diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-08-22 17:05:09 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-08-22 17:06:18 -0700 |
commit | 761d527d5d0036b98a2cecf4de1293d84d452aa1 (patch) | |
tree | 077007afc1704c75f7e733c69bfd1603b8c8e0df | |
parent | bfc52deef5ed58d92cfbe68dbceee3102617a8df (diff) | |
parent | aa0743a229366e8c1963f1b72a1c974a9d15f08f (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
No conflicts.
Adjacent changes:
drivers/net/ethernet/broadcom/bnxt/bnxt.h
c948c0973df5 ("bnxt_en: Don't clear ntuple filters and rss contexts during ethtool ops")
f2878cdeb754 ("bnxt_en: Add support to call FW to update a VNIC")
Link: https://patch.msgid.link/20240822210125.1542769-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
399 files changed, 4941 insertions, 2904 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 325873385b71..de725ca3be82 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -562,7 +562,8 @@ Description: Control Symmetric Multi Threading (SMT) ================ ========================================= If control status is "forceoff" or "notsupported" writes - are rejected. + are rejected. Note that enabling SMT on PowerPC skips + offline cores. What: /sys/devices/system/cpu/cpuX/power/energy_perf_bias Date: March 2019 diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index e625830d335e..552c9155165d 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -162,13 +162,14 @@ iv_large_sectors Module parameters:: -max_read_size -max_write_size - Maximum size of read or write requests. When a request larger than this size - is received, dm-crypt will split the request. The splitting improves - concurrency (the split requests could be encrypted in parallel by multiple - cores), but it also causes overhead. The user should tune these parameters to - fit the actual workload. + + max_read_size + max_write_size + Maximum size of read or write requests. When a request larger than this size + is received, dm-crypt will split the request. The splitting improves + concurrency (the split requests could be encrypted in parallel by multiple + cores), but it also causes overhead. The user should tune these parameters to + fit the actual workload. Example scripts diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 3db60a0911df..85b709257918 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -239,25 +239,33 @@ The following keys are defined: ratified in commit 98918c844281 ("Merge pull request #1217 from riscv/zawrs") of riscv-isa-manual. -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance - information about the selected set of processors. +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was + mistakenly classified as a bitmask rather than a value. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned - accesses is unknown. +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`: An enum value describing + the performance of misaligned scalar native word accesses on the selected set + of processors. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are - emulated via software, either in or below the kernel. These accesses are - always extremely slow. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN`: The performance of + misaligned scalar accesses is unknown. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower - than equivalent byte accesses. Misaligned accesses may be supported - directly in hardware, or trapped and emulated by software. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED`: Misaligned scalar + accesses are emulated via software, either in or below the kernel. These + accesses are always extremely slow. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster - than equivalent byte accesses. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW`: Misaligned scalar native + word sized accesses are slower than the equivalent quantity of byte + accesses. Misaligned accesses may be supported directly in hardware, or + trapped and emulated by software. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are - not supported at all and will generate a misaligned address fault. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_FAST`: Misaligned scalar native + word sized accesses are faster than the equivalent quantity of byte + accesses. + + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED`: Misaligned scalar + accesses are not supported at all and will generate a misaligned address + fault. * :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which represents the size of the Zicboz block in bytes. diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml index a584b4953e68..46403b98411f 100644 --- a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml index 6b9c1d198b14..10afe984e2fb 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on MSM8994 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml index a5a29dc75ae1..1fe68e07a2b2 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml index 2280b859b2ad..78e232fa95dc 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml index cf19f44af774..4ff17a91344b 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6115 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml index 374a1844a159..10a9c96a97b6 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm graphics clock control module provides clocks and power domains on diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml index fd6658cb793d..c03b30f64f35 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Camera Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm camera clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml index 183b1c75dbdf..3cd422a645fd 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml index 147b75a21508..de4e9066eeb8 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml index cf4cad76f6c9..d9dd479c17bd 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml index 46d1d91e3a01..5c2ecec0624e 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM8350 Video Clock & Reset Controller maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm video clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml index 3c2cac14e6c3..d10bb002906e 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM8450 maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm graphics clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml index 8e8a288d318c..e22b4c433fd0 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM6375 Display MDSS maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: SM6375 MSM Mobile Display Subsystem (MDSS), which encapsulates sub-blocks diff --git a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml index 2399cabf044c..dd614e077bbf 100644 --- a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml +++ b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: ASUS Z00T TM5P5 NT35596 5.5" 1080×1920 LCD Panel maintainers: - - Konrad Dybcio <konradybcio@gmail.com> + - Konrad Dybcio <konradybcio@kernel.org> description: |+ This panel seems to only be found in the Asus Z00T diff --git a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml index 87c601bcf20a..032f783eefc4 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml @@ -18,12 +18,12 @@ properties: # Samsung 13.3" FHD (1920x1080 pixels) eDP AMOLED panel - const: samsung,atna33xc20 - items: - - enum: - # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel - - samsung,atna45af01 - # Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel - - samsung,atna45dc02 - - const: samsung,atna33xc20 + - enum: + # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel + - samsung,atna45af01 + # Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel + - samsung,atna45dc02 + - const: samsung,atna33xc20 enable-gpios: true port: true diff --git a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml index 191b692125e1..032a989184ff 100644 --- a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml +++ b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Sony TD4353 JDI 5 / 5.7" 2160x1080 MIPI-DSI Panel maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | The Sony TD4353 JDI is a 5 (XZ2c) / 5.7 (XZ2) inch 2160x1080 diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml index 1715b0c9feea..c31e5e719525 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.yaml +++ b/Documentation/devicetree/bindings/eeprom/at25.yaml @@ -28,6 +28,7 @@ properties: - anvo,anv32e61w - atmel,at25256B - fujitsu,mb85rs1mt + - fujitsu,mb85rs256 - fujitsu,mb85rs64 - microchip,at25160bn - microchip,25lc040 diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml index 9fce7203bd42..78210791496f 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC7280 maintainers: - Bjorn Andersson <andersson@kernel.org> - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml index 6c2da03f0cd2..100c68636909 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC8280XP maintainers: - Bjorn Andersson <andersson@kernel.org> - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml index 3cff7e662255..300640a533dd 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SM8450 maintainers: - Bjorn Andersson <andersson@kernel.org> - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index 571e5746d177..f8cebc9e8cd9 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies legacy IOMMU implementations maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | Qualcomm "B" family devices which are not compatible with arm-smmu have diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml index bd3cbb44c99a..e75393b3d196 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. MDM9607 TLMM block maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: Top Level Mode Multiplexer pin controller in Qualcomm MDM9607 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml index a4771f87d936..b262af6be97d 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6350 TLMM block maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: Top Level Mode Multiplexer pin controller in Qualcomm SM6350 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml index 047f82863f9b..c11af09c3f5b 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6375 TLMM block maintainers: - - Konrad Dybcio <konrad.dybcio@somainline.org> + - Konrad Dybcio <konradybcio@kernel.org> description: Top Level Mode Multiplexer pin controller in Qualcomm SM6375 SoC. diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml index 7afafde17a38..61cf4fe19ca5 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml @@ -8,7 +8,7 @@ title: Qualcomm Resource Power Manager (RPM) Processor/Subsystem maintainers: - Bjorn Andersson <andersson@kernel.org> - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> - Stephan Gerhold <stephan@gerhold.net> description: | diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml index 9410404f87f1..ad2dcc39a5f5 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. (QTI) RPM Master Stats maintainers: - - Konrad Dybcio <konrad.dybcio@linaro.org> + - Konrad Dybcio <konradybcio@kernel.org> description: | The Qualcomm RPM (Resource Power Manager) architecture includes a concept diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index cc4626d6ee4f..c293f8e37468 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -75,7 +75,7 @@ Here are the main features of EROFS: - Support merging tail-end data into a special inode as fragments. - - Support large folios for uncompressed files. + - Support large folios to make use of THPs (Transparent Hugepages); - Support direct I/O on uncompressed files to avoid double caching for loop devices; diff --git a/Documentation/filesystems/smb/ksmbd.rst b/Documentation/filesystems/smb/ksmbd.rst index 6b30e43a0d11..67cb68ea6e68 100644 --- a/Documentation/filesystems/smb/ksmbd.rst +++ b/Documentation/filesystems/smb/ksmbd.rst @@ -13,7 +13,7 @@ KSMBD architecture The subset of performance related operations belong in kernelspace and the other subset which belong to operations which are not really related with performance in userspace. So, DCE/RPC management that has historically resulted -into number of buffer overflow issues and dangerous security bugs and user +into a number of buffer overflow issues and dangerous security bugs and user account management are implemented in user space as ksmbd.mountd. File operations that are related with performance (open/read/write/close etc.) in kernel space (ksmbd). This also allows for easier integration with VFS @@ -24,8 +24,8 @@ ksmbd (kernel daemon) When the server daemon is started, It starts up a forker thread (ksmbd/interface name) at initialization time and open a dedicated port 445 -for listening to SMB requests. Whenever new clients make request, Forker -thread will accept the client connection and fork a new thread for dedicated +for listening to SMB requests. Whenever new clients make a request, the Forker +thread will accept the client connection and fork a new thread for a dedicated communication channel between the client and the server. It allows for parallel processing of SMB requests(commands) from clients as well as allowing for new clients to make new connections. Each instance is named ksmbd/1~n(port number) @@ -34,12 +34,12 @@ thread can decide to pass through the commands to the user space (ksmbd.mountd), currently DCE/RPC commands are identified to be handled through the user space. To further utilize the linux kernel, it has been chosen to process the commands as workitems and to be executed in the handlers of the ksmbd-io kworker threads. -It allows for multiplexing of the handlers as the kernel take care of initiating +It allows for multiplexing of the handlers as the kernel takes care of initiating extra worker threads if the load is increased and vice versa, if the load is -decreased it destroys the extra worker threads. So, after connection is -established with client. Dedicated ksmbd/1..n(port number) takes complete +decreased it destroys the extra worker threads. So, after the connection is +established with the client. Dedicated ksmbd/1..n(port number) takes complete ownership of receiving/parsing of SMB commands. Each received command is worked -in parallel i.e., There can be multiple clients commands which are worked in +in parallel i.e., there can be multiple client commands which are worked in parallel. After receiving each command a separated kernel workitem is prepared for each command which is further queued to be handled by ksmbd-io kworkers. So, each SMB workitem is queued to the kworkers. This allows the benefit of load @@ -49,9 +49,9 @@ performance by handling client commands in parallel. ksmbd.mountd (user space daemon) -------------------------------- -ksmbd.mountd is userspace process to, transfer user account and password that +ksmbd.mountd is a userspace process to, transfer the user account and password that are registered using ksmbd.adduser (part of utils for user space). Further it -allows sharing information parameters that parsed from smb.conf to ksmbd in +allows sharing information parameters that are parsed from smb.conf to ksmbd in kernel. For the execution part it has a daemon which is continuously running and connected to the kernel interface using netlink socket, it waits for the requests (dcerpc and share/user info). It handles RPC calls (at a minimum few @@ -124,7 +124,7 @@ How to run 1. Download ksmbd-tools(https://github.com/cifsd-team/ksmbd-tools/releases) and compile them. - - Refer README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) + - Refer to README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) to know how to use ksmbd.mountd/adduser/addshare/control utils $ ./autogen.sh @@ -133,7 +133,7 @@ How to run 2. Create /usr/local/etc/ksmbd/ksmbd.conf file, add SMB share in ksmbd.conf file. - - Refer ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage + - Refer to ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage for details to configure shares. $ man ksmbd.conf @@ -145,7 +145,7 @@ How to run $ man ksmbd.adduser $ sudo ksmbd.adduser -a <Enter USERNAME for SMB share access> -4. Insert ksmbd.ko module after build your kernel. No need to load module +4. Insert the ksmbd.ko module after you build your kernel. No need to load the module if ksmbd is built into the kernel. - Set ksmbd in menuconfig(e.g. $ make menuconfig) @@ -175,7 +175,7 @@ Each layer 1. Enable all component prints # sudo ksmbd.control -d "all" -2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma) +2. Enable one of the components (smb, auth, vfs, oplock, ipc, conn, rdma) # sudo ksmbd.control -d "smb" 3. Show what prints are enabled. diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index bb5c44f8bd1c..6dc66b4f31a7 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -126,7 +126,7 @@ Ccache ``ccache`` can be used with ``clang`` to improve subsequent builds, (though KBUILD_BUILD_TIMESTAMP_ should be set to a deterministic value between builds -in order to avoid 100% cache misses, see Reproducible_builds_ for more info): +in order to avoid 100% cache misses, see Reproducible_builds_ for more info):: KBUILD_BUILD_TIMESTAMP='' make LLVM=1 CC="ccache clang" diff --git a/MAINTAINERS b/MAINTAINERS index 5dbf23cf11c8..5f45b75eba96 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3504,7 +3504,9 @@ S: Maintained W: http://linux-atm.sourceforge.net F: drivers/atm/ F: include/linux/atm* +F: include/linux/sonet.h F: include/uapi/linux/atm* +F: include/uapi/linux/sonet.h ATMEL MACB ETHERNET DRIVER M: Nicolas Ferre <nicolas.ferre@microchip.com> @@ -11993,7 +11995,7 @@ F: fs/jfs/ JME NETWORK DRIVER M: Guo-Fu Tseng <cooldavid@cooldavid.org> L: netdev@vger.kernel.org -S: Maintained +S: Odd Fixes F: drivers/net/ethernet/jme.* JOURNALLING FLASH FILE SYSTEM V2 (JFFS2) @@ -15877,15 +15879,19 @@ F: drivers/net/ F: include/dt-bindings/net/ F: include/linux/cn_proc.h F: include/linux/etherdevice.h +F: include/linux/ethtool_netlink.h F: include/linux/fcdevice.h F: include/linux/fddidevice.h F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h -F: include/linux/netdevice.h +F: include/linux/netdev* +F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h +F: include/uapi/linux/ethtool_netlink.h F: include/uapi/linux/if_* -F: include/uapi/linux/netdevice.h +F: include/uapi/linux/netdev* +F: tools/testing/selftests/drivers/net/ X: drivers/net/wireless/ NETWORKING DRIVERS (WIRELESS) @@ -15936,14 +15942,28 @@ F: include/linux/framer/framer-provider.h F: include/linux/framer/framer.h F: include/linux/in.h F: include/linux/indirect_call_wrapper.h +F: include/linux/inet.h +F: include/linux/inet_diag.h F: include/linux/net.h -F: include/linux/netdevice.h -F: include/linux/skbuff.h +F: include/linux/netdev* +F: include/linux/netlink.h +F: include/linux/netpoll.h +F: include/linux/rtnetlink.h +F: include/linux/seq_file_net.h +F: include/linux/skbuff* F: include/net/ +F: include/uapi/linux/genetlink.h +F: include/uapi/linux/hsr_netlink.h F: include/uapi/linux/in.h +F: include/uapi/linux/inet_diag.h +F: include/uapi/linux/nbd-netlink.h F: include/uapi/linux/net.h F: include/uapi/linux/net_namespace.h -F: include/uapi/linux/netdevice.h +F: include/uapi/linux/netconf.h +F: include/uapi/linux/netdev* +F: include/uapi/linux/netlink.h +F: include/uapi/linux/netlink_diag.h +F: include/uapi/linux/rtnetlink.h F: lib/net_utils.c F: lib/random32.c F: net/ @@ -21055,6 +21075,7 @@ SOCKET TIMESTAMPING M: Willem de Bruijn <willemdebruijn.kernel@gmail.com> S: Maintained F: Documentation/networking/timestamping.rst +F: include/linux/net_tstamp.h F: include/uapi/linux/net_tstamp.h F: tools/testing/selftests/net/so_txtime.c @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -1963,7 +1963,7 @@ tags TAGS cscope gtags: FORCE # Protocol). PHONY += rust-analyzer rust-analyzer: - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh + +$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh $(Q)$(MAKE) $(build)=rust $@ # Script to generate missing namespace dependencies @@ -1980,7 +1980,7 @@ nsdeps: modules quiet_cmd_gen_compile_commands = GEN $@ cmd_gen_compile_commands = $(PYTHON3) $< -a $(AR) -o $@ $(filter-out $<, $(real-prereqs)) -$(extmod_prefix)compile_commands.json: scripts/clang-tools/gen_compile_commands.py \ +$(extmod_prefix)compile_commands.json: $(srctree)/scripts/clang-tools/gen_compile_commands.py \ $(if $(KBUILD_EXTMOD),, vmlinux.a $(KBUILD_VMLINUX_LIBS)) \ $(if $(CONFIG_MODULES), $(MODORDER)) FORCE $(call if_changed,gen_compile_commands) diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index c30df1097c52..9f7454b8efa7 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -1109,7 +1109,7 @@ void ecard_remove_driver(struct ecard_driver *drv) driver_unregister(&drv->drv); } -static int ecard_match(struct device *_dev, struct device_driver *_drv) +static int ecard_match(struct device *_dev, const struct device_driver *_drv) { struct expansion_card *ec = ECARD_DEV(_dev); struct ecard_driver *drv = ECARD_DRV(_drv); diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 28f665e0975a..1aa4ecb73429 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -188,7 +188,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) #define __get_mem_asm(load, reg, x, addr, label, type) \ asm_goto_output( \ "1: " load " " reg "0, [%1]\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ : "=r" (x) \ : "r" (addr) : : label) #else diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index 0c036a9a3c33..2465f291c7e1 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -27,7 +27,7 @@ #include <asm/numa.h> -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a096e2451044..b22d28ec8028 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -355,9 +355,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); - /* Init percpu seeds for random tags after cpus are set up. */ - kasan_init_sw_tags(); - #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5e18fbcee9a2..f01f0fd7b7fe 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -467,6 +467,8 @@ void __init smp_prepare_boot_cpu(void) init_gic_priority_masking(); kasan_init_hw_tags(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_sw_tags(); } /* diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c index 2738325e98dd..d20eec742bfa 100644 --- a/arch/mips/sgi-ip22/ip22-gio.c +++ b/arch/mips/sgi-ip22/ip22-gio.c @@ -111,7 +111,7 @@ void gio_device_unregister(struct gio_device *giodev) } EXPORT_SYMBOL_GPL(gio_device_unregister); -static int gio_bus_match(struct device *dev, struct device_driver *drv) +static int gio_bus_match(struct device *dev, const struct device_driver *drv) { struct gio_device *gio_dev = to_gio_device(dev); struct gio_driver *gio_drv = to_gio_driver(drv); diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f4e6f2dd04b7..16bacfe8c7a2 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -145,6 +145,7 @@ static inline int cpu_to_coregroup_id(int cpu) #ifdef CONFIG_HOTPLUG_SMT #include <linux/cpu_smt.h> +#include <linux/cpumask.h> #include <asm/cputhreads.h> static inline bool topology_is_primary_thread(unsigned int cpu) @@ -156,6 +157,18 @@ static inline bool topology_smt_thread_allowed(unsigned int cpu) { return cpu_thread_in_core(cpu) < cpu_smt_num_threads; } + +#define topology_is_core_online topology_is_core_online +static inline bool topology_is_core_online(unsigned int cpu) +{ + int i, first_cpu = cpu_first_thread_sibling(cpu); + + for (i = first_cpu; i < first_cpu + threads_per_core; ++i) { + if (cpu_online(i)) + return true; + } + return false; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4bd2f87616ba..943430077375 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -959,6 +959,7 @@ void __init setup_arch(char **cmdline_p) mem_topology_setup(); /* Set max_mapnr before paging_init() */ set_max_mapnr(max_pfn); + high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* * Release secondary cpus out of their spinloops at 0x60 now that diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 9b4a675eb8f8..2978fcbe307e 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -73,7 +73,7 @@ void setup_kup(void) #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ - memset(addr, 0, sizeof(void *) << (shift)); \ + memset(addr, 0, sizeof(pgd_t) << (shift)); \ } CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7); @@ -117,7 +117,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ void pgtable_cache_add(unsigned int shift) { char *name; - unsigned long table_size = sizeof(void *) << shift; + unsigned long table_size = sizeof(pgd_t) << shift; unsigned long align = table_size; /* When batching pgtable pointers for RCU freeing, we store diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d325217ab201..da21cb018984 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -290,8 +290,6 @@ void __init mem_init(void) swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags); #endif - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - kasan_late_init(); memblock_free_all(); diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index ef01c182af2b..ffb9484531af 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 8 +#define RISCV_HWPROBE_MAX_KEY 9 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index b706c8e47b02..1e153cda57db 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -82,6 +82,12 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6 #define RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS 7 #define RISCV_HWPROBE_KEY_TIME_CSR_FREQ 8 +#define RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF 9 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN 0 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED 1 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c index 0231482d6946..ff95aeebee3e 100644 --- a/arch/riscv/kernel/acpi_numa.c +++ b/arch/riscv/kernel/acpi_numa.c @@ -28,7 +28,7 @@ #include <asm/numa.h> -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 69e5796fc51f..34ef522f07a8 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -205,6 +205,8 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) int ret; ret = patch_insn_set(addr, c, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -239,6 +241,8 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) int ret; ret = patch_insn_write(addr, insns, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 8d1b5c35d2a7..cea0ca2bf2a2 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -178,13 +178,13 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) perf = this_perf; if (perf != this_perf) { - perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + perf = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; break; } } if (perf == -1ULL) - return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + return RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; return perf; } @@ -192,12 +192,12 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) static u64 hwprobe_misaligned(const struct cpumask *cpus) { if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) - return RISCV_HWPROBE_MISALIGNED_FAST; + return RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) - return RISCV_HWPROBE_MISALIGNED_EMULATED; + return RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; - return RISCV_HWPROBE_MISALIGNED_SLOW; + return RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; } #endif @@ -225,6 +225,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, break; case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: pair->value = hwprobe_misaligned(cpus); break; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 05a16b1f0aee..51ebfd23e007 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -319,6 +319,7 @@ void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); @@ -328,8 +329,7 @@ void do_trap_ecall_u(struct pt_regs *regs) if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), * so the maximum stack offset is 1k bytes (10 bits). diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index b62d5a2f4541..192cd5603e95 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -338,7 +338,7 @@ int handle_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS - *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; #endif if (!unaligned_enabled) @@ -532,13 +532,13 @@ static bool check_unaligned_access_emulated(int cpu) unsigned long tmp_var, tmp_val; bool misaligned_emu_detected; - *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); + misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index a9a6bcb02acf..160628a2116d 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -34,9 +34,9 @@ static int check_unaligned_access(void *param) struct page *page = param; void *dst; void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) return 0; /* Make an unaligned destination buffer. */ @@ -95,14 +95,14 @@ static int check_unaligned_access(void *param) } if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; + speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; ratio = div_u64((byte_cycles * 100), word_cycles); pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", cpu, ratio / 100, ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; @@ -110,7 +110,7 @@ static int check_unaligned_access(void *param) * Set the value of fast_misaligned_access of a CPU. These operations * are atomic to avoid race conditions. */ - if (speed == RISCV_HWPROBE_MISALIGNED_FAST) + if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) cpumask_set_cpu(cpu, &fast_misaligned_access); else cpumask_clear_cpu(cpu, &fast_misaligned_access); @@ -188,7 +188,7 @@ static int riscv_online_cpu(unsigned int cpu) static struct page *buf; /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) goto exit; buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index b6c1e7b5d34b..a8126d118341 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -38,7 +38,7 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES case ANDES_VENDOR_ID: bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap; - cpu_bmap = &riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap[cpu]; + cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; break; #endif default: diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 8b698d9609e7..eb0649a61b4c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -927,7 +927,7 @@ static void __init create_kernel_page_table(pgd_t *pgdir, PMD_SIZE, PAGE_KERNEL_EXEC); /* Map the data in RAM */ - end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size; + end_va = kernel_map.virt_addr + kernel_map.size; for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)), @@ -1096,7 +1096,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; - kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); + kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; #else diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index cc57e2dd9a0b..2cafcf11ee8b 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -38,6 +38,7 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags, void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { unsigned int users; + unsigned long flags; struct blk_mq_tags *tags = hctx->tags; /* @@ -56,11 +57,11 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) return; } - spin_lock_irq(&tags->lock); + spin_lock_irqsave(&tags->lock, flags); users = tags->active_queues + 1; WRITE_ONCE(tags->active_queues, users); blk_mq_update_wake_batch(tags, users); - spin_unlock_irq(&tags->lock); + spin_unlock_irqrestore(&tags->lock, flags); } /* diff --git a/drivers/accessibility/speakup/genmap.c b/drivers/accessibility/speakup/genmap.c index 0125000e00d9..0882bab10fb8 100644 --- a/drivers/accessibility/speakup/genmap.c +++ b/drivers/accessibility/speakup/genmap.c @@ -10,7 +10,6 @@ #include <stdio.h> #include <libgen.h> #include <string.h> -#include <linux/version.h> #include <ctype.h> #include "utils.h" diff --git a/drivers/accessibility/speakup/makemapdata.c b/drivers/accessibility/speakup/makemapdata.c index d7d41bb9b05f..55e4ef8a93dc 100644 --- a/drivers/accessibility/speakup/makemapdata.c +++ b/drivers/accessibility/speakup/makemapdata.c @@ -10,7 +10,6 @@ #include <stdio.h> #include <libgen.h> #include <string.h> -#include <linux/version.h> #include <ctype.h> #include "utils.h" diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index 2133085deda7..1c5218b79fc2 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -188,13 +188,9 @@ acpi_ev_detach_region(union acpi_operand_object *region_obj, u8 acpi_ns_is_locked); void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function); -void -acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *node, - acpi_adr_space_type space_id); - acpi_status acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function); diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index dc6004daf624..cf53b9535f18 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -20,6 +20,10 @@ extern u8 acpi_gbl_default_address_spaces[]; /* Local prototypes */ +static void +acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, + acpi_adr_space_type space_id); + static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -61,6 +65,7 @@ acpi_status acpi_ev_initialize_op_regions(void) acpi_gbl_default_address_spaces [i])) { acpi_ev_execute_reg_methods(acpi_gbl_root_node, + ACPI_UINT32_MAX, acpi_gbl_default_address_spaces [i], ACPI_REG_CONNECT); } @@ -668,6 +673,7 @@ cleanup1: * FUNCTION: acpi_ev_execute_reg_methods * * PARAMETERS: node - Namespace node for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * function - Passed to _REG: On (1) or Off (0) * @@ -679,7 +685,7 @@ cleanup1: ******************************************************************************/ void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function) { struct acpi_reg_walk_info info; @@ -713,7 +719,7 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, * regions and _REG methods. (i.e. handlers must be installed for all * regions of this Space ID before we can run any _REG methods) */ - (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, + (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, max_depth, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL, &info, NULL); @@ -814,7 +820,7 @@ acpi_ev_reg_run(acpi_handle obj_handle, * ******************************************************************************/ -void +static void acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, acpi_adr_space_type space_id) { diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 624361a5f34d..95f78383bbdb 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -85,7 +85,8 @@ acpi_install_address_space_handler_internal(acpi_handle device, /* Run all _REG methods for this address space */ if (run_reg) { - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, ACPI_UINT32_MAX, space_id, + ACPI_REG_CONNECT); } unlock_and_exit: @@ -263,6 +264,7 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * FUNCTION: acpi_execute_reg_methods * * PARAMETERS: device - Handle for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * * RETURN: Status @@ -271,7 +273,8 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * ******************************************************************************/ acpi_status -acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) +acpi_execute_reg_methods(acpi_handle device, u32 max_depth, + acpi_adr_space_type space_id) { struct acpi_namespace_node *node; acpi_status status; @@ -296,7 +299,8 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) /* Run all _REG methods for this address space */ - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, max_depth, space_id, + ACPI_REG_CONNECT); } else { status = AE_BAD_PARAMETER; } @@ -306,57 +310,3 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) } ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods) - -/******************************************************************************* - * - * FUNCTION: acpi_execute_orphan_reg_method - * - * PARAMETERS: device - Handle for the device - * space_id - The address space ID - * - * RETURN: Status - * - * DESCRIPTION: Execute an "orphan" _REG method that appears under an ACPI - * device. This is a _REG method that has no corresponding region - * within the device's scope. - * - ******************************************************************************/ -acpi_status -acpi_execute_orphan_reg_method(acpi_handle device, acpi_adr_space_type space_id) -{ - struct acpi_namespace_node *node; - acpi_status status; - - ACPI_FUNCTION_TRACE(acpi_execute_orphan_reg_method); - - /* Parameter validation */ - - if (!device) { - return_ACPI_STATUS(AE_BAD_PARAMETER); - } - - status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - /* Convert and validate the device handle */ - - node = acpi_ns_validate_handle(device); - if (node) { - - /* - * If an "orphan" _REG method is present in the device's scope - * for the given address space ID, run it. - */ - - acpi_ev_execute_orphan_reg_method(node, space_id); - } else { - status = AE_BAD_PARAMETER; - } - - (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); - return_ACPI_STATUS(status); -} - -ACPI_EXPORT_SYMBOL(acpi_execute_orphan_reg_method) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 299ec653388c..38d2f6e6b12b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1487,12 +1487,13 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec) static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, bool call_reg) { - acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; acpi_status status; acpi_ec_start(ec, false); if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) { + acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; + acpi_ec_enter_noirq(ec); status = acpi_install_address_space_handler_no_reg(scope_handle, ACPI_ADR_SPACE_EC, @@ -1506,10 +1507,7 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, } if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { - acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC); - if (scope_handle != ec->handle) - acpi_execute_orphan_reg_method(ec->handle, ACPI_ADR_SPACE_EC); - + acpi_execute_reg_methods(ec->handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC); set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } @@ -1724,6 +1722,12 @@ static void acpi_ec_remove(struct acpi_device *device) } } +void acpi_ec_register_opregions(struct acpi_device *adev) +{ + if (first_ec && first_ec->handle != adev->handle) + acpi_execute_reg_methods(adev->handle, 1, ACPI_ADR_SPACE_EC); +} + static acpi_status ec_parse_io_ports(struct acpi_resource *resource, void *context) { diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 601b670356e5..aadd4c218b32 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -223,6 +223,7 @@ int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); +void acpi_ec_register_opregions(struct acpi_device *adev); #ifdef CONFIG_PM_SLEEP void acpi_ec_flush_work(void); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 59771412686b..22ae7829a915 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2273,6 +2273,8 @@ static int acpi_bus_attach(struct acpi_device *device, void *first_pass) if (device->handler) goto ok; + acpi_ec_register_opregions(device); + if (!device->flags.initialized) { device->flags.power_manageable = device->power.states[ACPI_STATE_D0].flags.valid; diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 2ebc970e6573..7d5e4de64e3c 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2945,9 +2945,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22) - set_bit(HCI_QUIRK_VALID_LE_STATES, - &hdev->quirks); err = btintel_legacy_rom_setup(hdev, &ver); break; @@ -2956,7 +2953,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) case 0x12: /* ThP */ case 0x13: /* HrP */ case 0x14: /* CcP */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); fallthrough; case 0x0c: /* WsP */ /* Apply the device specific HCI quirks @@ -3048,9 +3044,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) /* These variants don't seem to support LE Coded PHY */ set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); - /* Set Valid LE States quirk */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, ver.hw_variant); @@ -3076,9 +3069,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - /* Apply LE States quirk from solar onwards */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 0d1a0415557b..1c7631f22c52 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1180,9 +1180,6 @@ static int btintel_pcie_setup(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - /* Apply LE States quirk from solar onwards */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 39d6898497a4..497e4c87f5be 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1148,9 +1148,6 @@ static int btmtksdio_setup(struct hci_dev *hdev) } } - /* Valid LE States quirk for MediaTek 7921 */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - break; case 0x7663: case 0x7668: diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index f2f37143c454..fd7991ea7672 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1287,7 +1287,6 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) case CHIP_ID_8852C: case CHIP_ID_8851B: case CHIP_ID_8852BT: - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); /* RTL8852C needs to transmit mSBC data continuously without diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index acdba5d77694..51d9d4532dda 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3956,8 +3956,8 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_WIDEBAND_SPEECH) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (id->driver_info & BTUSB_VALID_LE_STATES) - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + if (!(id->driver_info & BTUSB_VALID_LE_STATES)) + set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 45adc1560d94..4b1ad7ea5b95 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2474,8 +2474,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (data->capabilities & QCA_CAP_VALID_LE_STATES) - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + if (!(data->capabilities & QCA_CAP_VALID_LE_STATES)) + set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); } return 0; diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index c4046f8f1985..43e9ac5a3324 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -425,8 +425,6 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) if (opcode & 0x80) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); hci_free_dev(hdev); diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index 5a5afa14ca8c..45771b1a3716 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -50,6 +50,7 @@ MODULE_LICENSE("GPL v2"); static const char xillyname[] = "xillyusb"; static unsigned int fifo_buf_order; +static struct workqueue_struct *wakeup_wq; #define USB_VENDOR_ID_XILINX 0x03fd #define USB_VENDOR_ID_ALTERA 0x09fb @@ -569,10 +570,6 @@ static void cleanup_dev(struct kref *kref) * errors if executed. The mechanism relies on that xdev->error is assigned * a non-zero value by report_io_error() prior to queueing wakeup_all(), * which prevents bulk_in_work() from calling process_bulk_in(). - * - * The fact that wakeup_all() and bulk_in_work() are queued on the same - * workqueue makes their concurrent execution very unlikely, however the - * kernel's API doesn't seem to ensure this strictly. */ static void wakeup_all(struct work_struct *work) @@ -627,7 +624,7 @@ static void report_io_error(struct xillyusb_dev *xdev, if (do_once) { kref_get(&xdev->kref); /* xdev is used by work item */ - queue_work(xdev->workq, &xdev->wakeup_workitem); + queue_work(wakeup_wq, &xdev->wakeup_workitem); } } @@ -1906,6 +1903,13 @@ static const struct file_operations xillyusb_fops = { static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev) { + struct usb_device *udev = xdev->udev; + + /* Verify that device has the two fundamental bulk in/out endpoints */ + if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, MSG_EP_NUM)) || + usb_pipe_type_check(udev, usb_rcvbulkpipe(udev, IN_EP_NUM))) + return -ENODEV; + xdev->msg_ep = endpoint_alloc(xdev, MSG_EP_NUM | USB_DIR_OUT, bulk_out_work, 1, 2); if (!xdev->msg_ep) @@ -1935,14 +1939,15 @@ static int setup_channels(struct xillyusb_dev *xdev, __le16 *chandesc, int num_channels) { - struct xillyusb_channel *chan; + struct usb_device *udev = xdev->udev; + struct xillyusb_channel *chan, *new_channels; int i; chan = kcalloc(num_channels, sizeof(*chan), GFP_KERNEL); if (!chan) return -ENOMEM; - xdev->channels = chan; + new_channels = chan; for (i = 0; i < num_channels; i++, chan++) { unsigned int in_desc = le16_to_cpu(*chandesc++); @@ -1971,6 +1976,15 @@ static int setup_channels(struct xillyusb_dev *xdev, */ if ((out_desc & 0x80) && i < 14) { /* Entry is valid */ + if (usb_pipe_type_check(udev, + usb_sndbulkpipe(udev, i + 2))) { + dev_err(xdev->dev, + "Missing BULK OUT endpoint %d\n", + i + 2); + kfree(new_channels); + return -ENODEV; + } + chan->writable = 1; chan->out_synchronous = !!(out_desc & 0x40); chan->out_seekable = !!(out_desc & 0x20); @@ -1980,6 +1994,7 @@ static int setup_channels(struct xillyusb_dev *xdev, } } + xdev->channels = new_channels; return 0; } @@ -2096,9 +2111,11 @@ static int xillyusb_discovery(struct usb_interface *interface) * just after responding with the IDT, there is no reason for any * work item to be running now. To be sure that xdev->channels * is updated on anything that might run in parallel, flush the - * workqueue, which rarely does anything. + * device's workqueue and the wakeup work item. This rarely + * does anything. */ flush_workqueue(xdev->workq); + flush_work(&xdev->wakeup_workitem); xdev->num_channels = num_channels; @@ -2258,6 +2275,10 @@ static int __init xillyusb_init(void) { int rc = 0; + wakeup_wq = alloc_workqueue(xillyname, 0, 0); + if (!wakeup_wq) + return -ENOMEM; + if (LOG2_INITIAL_FIFO_BUF_SIZE > PAGE_SHIFT) fifo_buf_order = LOG2_INITIAL_FIFO_BUF_SIZE - PAGE_SHIFT; else @@ -2265,12 +2286,17 @@ static int __init xillyusb_init(void) rc = usb_register(&xillyusb_driver); + if (rc) + destroy_workqueue(wakeup_wq); + return rc; } static void __exit xillyusb_exit(void) { usb_deregister(&xillyusb_driver); + + destroy_workqueue(wakeup_wq); } module_init(xillyusb_init); diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c index cbc176b27c09..17e32ae08720 100644 --- a/drivers/clk/thead/clk-th1520-ap.c +++ b/drivers/clk/thead/clk-th1520-ap.c @@ -738,7 +738,7 @@ static struct ccu_div vp_axi_clk = { .hw.init = CLK_HW_INIT_PARENTS_HW("vp-axi", video_pll_clk_parent, &ccu_div_ops, - 0), + CLK_IGNORE_UNUSED), }, }; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index a663e7566c48..51132a575b27 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -834,11 +834,13 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) { struct device *dport_dev = dport->dport_dev; - struct pci_host_bridge *host_bridge; - host_bridge = to_pci_host_bridge(dport_dev); - if (host_bridge->native_aer) - dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev); + + if (host_bridge->native_aer) + dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + } dport->reg_map.host = host; cxl_dport_map_regs(dport); diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index d5906d419b0a..10ea71273c89 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -39,6 +39,8 @@ #define MLXBF_GPIO_CAUSE_OR_EVTEN0 0x14 #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE 0x18 +#define MLXBF_GPIO_CLR_ALL_INTS GENMASK(31, 0) + struct mlxbf3_gpio_context { struct gpio_chip gc; @@ -82,6 +84,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd) val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); val &= ~BIT(offset); writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + + writel(BIT(offset), gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags); gpiochip_disable_irq(gc, offset); @@ -253,6 +257,15 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) return 0; } +static void mlxbf3_gpio_shutdown(struct platform_device *pdev) +{ + struct mlxbf3_gpio_context *gs = platform_get_drvdata(pdev); + + /* Disable and clear all interrupts */ + writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); +} + static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = { { "MLNXBF33", 0 }, {} @@ -265,6 +278,7 @@ static struct platform_driver mlxbf3_gpio_driver = { .acpi_match_table = mlxbf3_gpio_acpi_match, }, .probe = mlxbf3_gpio_probe, + .shutdown = mlxbf3_gpio_shutdown, }; module_platform_driver(mlxbf3_gpio_driver); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9aa952f258cf..6dfdff58bffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1057,6 +1057,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, r = amdgpu_ring_parse_cs(ring, p, job, ib); if (r) return r; + + if (ib->sa_bo) + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } else { ib->ptr = (uint32_t *)kptr; r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5cb33ac99f70..c43d1b6e5d66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_QUERY_STATE2: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_GET_STABLE_PSTATE: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 82452606ae6c..c770cb201e64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -995,7 +1017,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ if (amdgpu_device_skip_hw_access(adev)) return 0; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) return amdgpu_mes_rreg(adev, reg); BUG_ON(!ring->funcs->emit_rreg); @@ -1065,7 +1087,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 if (amdgpu_device_skip_hw_access(adev)) return; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_wreg(adev, reg, v); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c02659025656..b49b3650fd62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring || + if (ring == &adev->mes.ring[0] || + ring == &adev->mes.ring[1] || ring == &adev->umsch_mm.ring) continue; @@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, ref, mask); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index dac88d2dd70d..1cb1ec7beefe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + spin_lock_init(&adev->mes.ring_lock[i]); + adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; @@ -163,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); - if (r) { - dev_err(adev->dev, - "(%d) ring trail_fence_offs wb alloc failed\n", r); - goto error_ids; - } - adev->mes.sch_ctx_gpu_addr = - adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); - adev->mes.sch_ctx_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.sch_ctx_gpu_addr[i] = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); + adev->mes.sch_ctx_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; - r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); - if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - dev_err(adev->dev, - "(%d) query_status_fence_offs wb alloc failed\n", r); - goto error_ids; + r = amdgpu_device_wb_get(adev, + &adev->mes.query_status_fence_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + + (adev->mes.query_status_fence_offs[i] * 4); + adev->mes.query_status_fence_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - adev->mes.query_status_fence_gpu_addr = - adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); - adev->mes.query_status_fence_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); dev_err(adev->dev, "(%d) read_val_offs alloc failed\n", r); - goto error_ids; + goto error; } adev->mes.read_val_gpu_addr = adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); @@ -212,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); -error_ids: + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); @@ -226,13 +236,22 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + int i; + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); @@ -1499,7 +1518,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_uni_mes.bin", ucode_prefix); } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2d659c612f03..0bc837dab578 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -82,8 +82,8 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring; - spinlock_t ring_lock; + struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; @@ -112,12 +112,12 @@ struct amdgpu_mes { uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs; - uint64_t sch_ctx_gpu_addr; - uint64_t *sch_ctx_ptr; - uint32_t query_status_fence_offs; - uint64_t query_status_fence_gpu_addr; - uint64_t *query_status_fence_ptr; + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; uint32_t read_val_offs; uint64_t read_val_gpu_addr; uint32_t *read_val_ptr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ad49cecb20b8..e6344a6b0a9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + if (ring->funcs->type == AMDGPU_RING_TYPE_MES) + sched_hw_submission = 8; else if (ring == &adev->sdma.instance[0].page) sched_hw_submission = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1a5439abd1a0..c87d68d4be53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -461,8 +461,11 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; - uint8_t pad2[4]; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 111c380f929b..b287a82e6177 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) adev->gfx.is_poweron = false; } - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 506fa8003388..2c611b8577a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3546,33 +3546,9 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq[0].ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} - static int gfx_v12_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -3580,8 +3556,7 @@ static int gfx_v12_0_hw_fini(void *handle) if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { - r = gfx_v12_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b88a6fa173b3..2797fd84432b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid, GET_INST(GC, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 26efce9aa410..edcb5351f8cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657..98aa3ccd0d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ad524ddc9760..6ae5a784e187 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" @@ -782,11 +783,15 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1248,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } + +/** + * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 747a3e5f6856..71c54b294e15 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,6 +46,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); - +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d694a276498a..f4daff90c770 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f9343642ae7e..2ea8223eb969 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -162,13 +162,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -191,11 +191,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -208,14 +215,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v11_0_get_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -252,8 +258,12 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -512,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = @@ -1015,7 +1025,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } @@ -1029,7 +1039,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1071,7 +1081,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -1124,7 +1134,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1200,9 +1210,6 @@ static int mes_v11_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1216,12 +1223,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1332,9 +1339,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1352,7 +1359,7 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq) { @@ -1397,7 +1404,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 0713bc3eb263..e39a58d262c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -142,19 +142,20 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) } static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) + int pipe, void *pkt, int size, + int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[pipe]; + spinlock_t *ring_lock = &mes->ring_lock[pipe]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -177,11 +178,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(ring_lock, flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -194,39 +202,39 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, - misc_op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n", + pipe, op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", - x_pkt->header.opcode); + dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n", + pipe, x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", - op_str, misc_op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s\n", - op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n", + pipe, op_str); else - dev_err(adev->dev, "MES failed to respond to msg=%d\n", - x_pkt->header.opcode); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n", + pipe, x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -238,8 +246,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -254,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) return MES_QUEUE_TYPE_COMPUTE; else if (queue_type == AMDGPU_RING_TYPE_SDMA) return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; else BUG(); return -1; @@ -311,6 +325,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gds_size = input->queue_size; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -330,6 +345,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -338,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -354,7 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.map_legacy_kq = 1; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -363,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -387,7 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -404,7 +432,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, return 0; } -static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; @@ -414,7 +442,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_status_pkt, sizeof(mes_status_pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } @@ -423,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, struct mes_misc_op_input *input) { union MESAPI__MISC misc_pkt; + int pipe; memset(&misc_pkt, 0, sizeof(misc_pkt)); @@ -475,12 +504,17 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } -static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; @@ -491,12 +525,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; struct amdgpu_device *adev = mes->adev; @@ -508,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; - mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr[pipe]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -556,7 +596,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } @@ -734,16 +774,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) if (enable) { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - soc21_grbm_select(adev, 3, pipe, 0, 0); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; @@ -757,8 +792,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) /* unhalt MES and activate pipe0 */ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); if (amdgpu_emu_mode) @@ -774,8 +808,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) data = REG_SET_FIELD(data, CP_MES_CNTL, MES_INVALIDATE_ICACHE, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); } @@ -790,10 +823,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - /* me=3, queue=0 */ soc21_grbm_select(adev, 3, pipe, 0, 0); @@ -1085,7 +1114,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); r = amdgpu_ring_test_ring(kiq_ring); if (r) { @@ -1101,14 +1130,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; int r; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; - if ((pipe == AMDGPU_MES_SCHED_PIPE) && + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && (amdgpu_in_reset(adev) || adev->in_suspend)) { *(ring->wptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0; @@ -1120,13 +1147,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return r; if (pipe == AMDGPU_MES_SCHED_PIPE) { - if (adev->enable_uni_mes) { - mes_v12_0_queue_init_register(ring); - } else { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring); + else r = mes_v12_0_kiq_enable_queue(adev); - if (r) - return r; - } + if (r) + return r; } else { mes_v12_0_queue_init_register(ring); } @@ -1146,25 +1172,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_ring_init(struct amdgpu_device *adev) +static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[pipe]; ring->funcs = &mes_v12_0_ring_funcs; ring->me = 3; - ring->pipe = 0; + ring->pipe = pipe; ring->queue = 0; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe]; ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } @@ -1178,7 +1208,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ring = &adev->gfx.kiq[0].ring; ring->me = 3; - ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->pipe = 1; ring->queue = 0; ring->adev = NULL; @@ -1200,12 +1230,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; if (ring->mqd_obj) return 0; @@ -1246,9 +1274,6 @@ static int mes_v12_0_sw_init(void *handle) return r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = mes_v12_0_allocate_eop_buf(adev, pipe); if (r) return r; @@ -1256,18 +1281,15 @@ static int mes_v12_0_sw_init(void *handle) r = mes_v12_0_mqd_sw_init(adev, pipe); if (r) return r; - } - if (adev->enable_mes_kiq) { - r = mes_v12_0_kiq_ring_init(adev); + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + r = mes_v12_0_kiq_ring_init(adev); + else + r = mes_v12_0_ring_init(adev, pipe); if (r) return r; } - r = mes_v12_0_ring_init(adev); - if (r) - return r; - return 0; } @@ -1276,9 +1298,6 @@ static int mes_v12_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1286,18 +1305,21 @@ static int mes_v12_0_sw_fini(void *handle) &adev->mes.eop_gpu_addr[pipe], NULL); amdgpu_ucode_release(&adev->mes.fw[pipe]); - } - - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj, + &adev->mes.ring[pipe].mqd_gpu_addr, + &adev->mes.ring[pipe].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[pipe]); + } + } - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1341,7 +1363,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) @@ -1362,10 +1384,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; - mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); - if (adev->enable_uni_mes) - return mes_v12_0_hw_init(adev); + mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); + else + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1392,6 +1414,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; + if (adev->enable_uni_mes) { + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); + } + r = mes_v12_0_hw_init(adev); if (r) goto failure; @@ -1405,9 +1435,15 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v12_0_kiq_dequeue_sched(adev); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], + RESET_QUEUES, 0, 0); + else + mes_v12_0_kiq_dequeue_sched(adev); + + adev->mes.ring[0].sched.ready = false; } mes_v12_0_enable(adev, false); @@ -1420,10 +1456,10 @@ static int mes_v12_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; - if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, true); @@ -1443,23 +1479,23 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_enable(adev, true); } + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; - r = mes_v12_0_set_hw_resources(&adev->mes); + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); - /* Enable the MES to handle doorbell ring on unmapped queue */ - mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - - r = mes_v12_0_query_sched_status(&adev->mes); + r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); goto failure; @@ -1472,7 +1508,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; @@ -1515,17 +1551,7 @@ static int mes_v12_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe, r; - if (adev->enable_uni_mes) { - r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (!r) - return 0; - - adev->enable_uni_mes = false; - } - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; r = amdgpu_mes_init_microcode(adev, pipe); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 2357ff39323f..e74e1983da53 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -76,6 +76,12 @@ ((cond & 0xF) << 24) | \ ((type & 0xF) << 28)) +#define CP_PACKETJ_NOP 0x60000000 +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index 7d641d0dadba..b0c3678cfb31 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | @@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7e7929f24ae4..983a977632ff 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2893,6 +2893,9 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); + if (adev->dm.dc->caps.ips_support) + dc_allow_idle_optimizations(adev->dm.dc, true); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 915eb2c08ece..2e9f6da1acdc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { }; #if defined(CONFIG_DRM_AMD_DC_FP) -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn( - params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index fa84d34b7373..600d6e221011 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + enum mst_msg_ready_type { NONE_MSG_RDY_EVENT = 0, DOWN_REP_MSG_RDY_EVENT = 1, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index ff03b1d98aa7..14a902ff3b8a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3589,7 +3589,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { @@ -3682,7 +3682,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; + pos_cpy.x = temp_x + viewport_width; } } } else { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 9a3cc0514a36..8e0588b1cf30 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; /* read VBIOS LTTPR caps */ diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index 4cf2c9f30b3d..101e2fe962c6 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -97,6 +97,7 @@ enum MES_QUEUE_TYPE { MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_MAX, + MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX, }; struct MES_API_STATUS { @@ -242,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { uint32_t send_write_data : 1; uint32_t os_tdr_timeout_override : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1; + uint32_t halt_on_misaligned_access : 1; + uint32_t use_add_queue_unmap_flag_addr : 1; + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; - uint32_t reserved : 15; + uint32_t reserved : 11; }; uint32_t uint32_all; }; diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index c16c7678237e..0830cae9a4d0 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -208,6 +208,18 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "KUN"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, + }, { /* AYN Loki Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Max"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, + }, { /* AYN Loki Zero */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Zero"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index ae5c6ec24a1e..77b50c56c124 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -539,8 +539,8 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* IGT will check if the cursor size is configured */ - drm->mode_config.cursor_width = drm->mode_config.max_width; - drm->mode_config.cursor_height = drm->mode_config.max_height; + drm->mode_config.cursor_width = 512; + drm->mode_config.cursor_height = 512; /* Use OVL device for all DMA memory allocations */ crtc = drm_crtc_from_index(drm, 0); diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 2241e53a2946..dec6913cec5b 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -279,7 +279,6 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, const u8 *buffer, size_t len) { struct inno_hdmi *hdmi = connector_to_inno_hdmi(connector); - u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE]; ssize_t i; if (type != HDMI_INFOFRAME_TYPE_AVI) { @@ -291,8 +290,7 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, inno_hdmi_disable_frame(connector, type); for (i = 0; i < len; i++) - hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, - packed_frame[i]); + hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, buffer[i]); return 0; } diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 9bd7453b25ad..b8682818bafa 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -315,7 +315,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; - int i, csd_cfg0_reg, csd_cfg_reg_count; + int i, csd_cfg0_reg; v3d->csd_job = job; @@ -335,9 +335,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) v3d_switch_perfmon(v3d, &job->base); csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); - csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; - for (i = 1; i <= csd_cfg_reg_count; i++) + for (i = 1; i <= 6; i++) V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); + + /* Although V3D 7.1 has an eighth configuration register, we are not + * using it. Therefore, make sure it remains unused. + * + * XXX: Set the CFG7 register + */ + if (v3d->ver >= 71) + V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0); + /* CFG0 write kicks off the job. */ V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 76109415eba6..f2f1d8ddb221 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -87,9 +87,55 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) spin_unlock(&xe->clients.lock); file->driver_priv = xef; + kref_init(&xef->refcount); + return 0; } +static void xe_file_destroy(struct kref *ref) +{ + struct xe_file *xef = container_of(ref, struct xe_file, refcount); + struct xe_device *xe = xef->xe; + + xa_destroy(&xef->exec_queue.xa); + mutex_destroy(&xef->exec_queue.lock); + xa_destroy(&xef->vm.xa); + mutex_destroy(&xef->vm.lock); + + spin_lock(&xe->clients.lock); + xe->clients.count--; + spin_unlock(&xe->clients.lock); + + xe_drm_client_put(xef->client); + kfree(xef); +} + +/** + * xe_file_get() - Take a reference to the xe file object + * @xef: Pointer to the xe file + * + * Anyone with a pointer to xef must take a reference to the xe file + * object using this call. + * + * Return: xe file pointer + */ +struct xe_file *xe_file_get(struct xe_file *xef) +{ + kref_get(&xef->refcount); + return xef; +} + +/** + * xe_file_put() - Drop a reference to the xe file object + * @xef: Pointer to the xe file + * + * Used to drop reference to the xef object + */ +void xe_file_put(struct xe_file *xef) +{ + kref_put(&xef->refcount, xe_file_destroy); +} + static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -98,6 +144,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) struct xe_exec_queue *q; unsigned long idx; + xe_pm_runtime_get(xe); + /* * No need for exec_queue.lock here as there is no contention for it * when FD is closing as IOCTLs presumably can't be modifying the @@ -108,21 +156,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - xa_destroy(&xef->exec_queue.xa); - mutex_destroy(&xef->exec_queue.lock); mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); mutex_unlock(&xef->vm.lock); - xa_destroy(&xef->vm.xa); - mutex_destroy(&xef->vm.lock); - spin_lock(&xe->clients.lock); - xe->clients.count--; - spin_unlock(&xe->clients.lock); + xe_file_put(xef); - xe_drm_client_put(xef->client); - kfree(xef); + xe_pm_runtime_put(xe); } static const struct drm_ioctl_desc xe_ioctls[] = { diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index bb07f5669dbb..b3952718b3c1 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -170,4 +170,7 @@ static inline bool xe_device_wedged(struct xe_device *xe) void xe_device_declare_wedged(struct xe_device *xe); +struct xe_file *xe_file_get(struct xe_file *xef); +void xe_file_put(struct xe_file *xef); + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 3bca6d344744..cbc582bcc90a 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -566,6 +566,9 @@ struct xe_file { /** @client: drm client */ struct xe_drm_client *client; + + /** @refcount: ref count of this xe file */ + struct kref refcount; }; #endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 6a26923fa10e..7ddd59908334 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) { + xa_for_each(&xef->exec_queue.xa, i, q) xe_exec_queue_update_run_ticks(q); - xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks; - q->old_run_ticks = q->run_ticks; - } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0ba37835849b..a39384bb9553 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) { if (q->vm) xe_vm_put(q->vm); + + if (q->xef) + xe_file_put(q->xef); + kfree(q); } @@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, goto kill_exec_queue; args->exec_queue_id = id; + q->xef = xe_file_get(xef); return 0; @@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) */ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { + struct xe_file *xef; struct xe_lrc *lrc; u32 old_ts, new_ts; @@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) if (!q->vm || !q->vm->xef) return; + xef = q->vm->xef; + /* * Only sample the first LRC. For parallel submission, all of them are * scheduled together and we compensate that below by multiplying by @@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) */ lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - q->run_ticks += (new_ts - old_ts) * q->width; + xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } void xe_exec_queue_kill(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 201588ec33c3..a35ce24c9798 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -38,6 +38,9 @@ enum xe_exec_queue_priority { * a kernel object. */ struct xe_exec_queue { + /** @xef: Back pointer to xe file if this is user created exec queue */ + struct xe_file *xef; + /** @gt: graphics tile this exec queue can submit to */ struct xe_gt *gt; /** @@ -139,10 +142,6 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; - /** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */ - u64 old_run_ticks; - /** @run_ticks: hw engine class run time in ticks for this exec queue */ - u64 run_ticks; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 4699b7836001..b6f0a7299c03 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1927,6 +1927,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool is_primary = !xe_gt_is_media_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -1935,13 +1936,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_dbs = pf_get_vf_config_dbs(gt, vfid); /* note that GuC doorbells are optional */ - valid_any = valid_ggtt || valid_ctxs || valid_dbs; - valid_all = valid_ggtt && valid_ctxs; + valid_any = valid_ctxs || valid_dbs; + valid_all = valid_ctxs; + + /* and GGTT/LMEM is configured on primary GT only */ + valid_all = valid_all && valid_ggtt; + valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); - valid_any = valid_any || valid_lmem; + valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 41e46a00c01e..8892d6c2291e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr) xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key), + return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key), vf_runtime_reg_cmp); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index d9359976ab8b..481d83d07367 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -13,10 +13,13 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_trace.h" #include "regs/xe_guc_regs.h" +#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS + /* * TLB inval depends on pending commands in the CT queue and then the real * invalidation time. Double up the time to process full CT queue @@ -33,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) return hw_tlb_timeout + 2 * delay; } +static void +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); + + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); + xe_gt_tlb_invalidation_fence_fini(fence); + dma_fence_signal(&fence->base); + if (!stack) + dma_fence_put(&fence->base); +} + +static void +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + list_del(&fence->link); + __invalidation_fence_signal(xe, fence); +} static void xe_gt_tlb_fence_timeout(struct work_struct *work) { @@ -54,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", fence->seqno, gt->tlb_invalidation.seqno_recv); - list_del(&fence->link); fence->base.error = -ETIME; - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); + invalidation_fence_signal(xe, fence); } if (!list_empty(>->tlb_invalidation.pending_fences)) queue_delayed_work(system_wq, @@ -87,21 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) return 0; } -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * @gt: graphics tile @@ -111,7 +115,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) { struct xe_gt_tlb_invalidation_fence *fence, *next; - struct xe_guc *guc = >->uc.guc; int pending_seqno; /* @@ -134,7 +137,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) else pending_seqno = gt->tlb_invalidation.seqno - 1; WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) @@ -165,6 +167,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, int seqno; int ret; + xe_gt_assert(gt, fence); + /* * XXX: The seqno algorithm relies on TLB invalidation being processed * in order which they currently are, if that changes the algorithm will @@ -173,10 +177,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; - if (fence) { - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - } + fence->seqno = seqno; + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); @@ -209,7 +211,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, TLB_INVALIDATION_SEQNO_MAX; if (!gt->tlb_invalidation.seqno) gt->tlb_invalidation.seqno = 1; - ret = seqno; } mutex_unlock(&guc->ct.lock); @@ -223,14 +224,16 @@ static int send_tlb_invalidation(struct xe_guc *guc, /** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion * * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. + * caller can use the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: 0 on success, negative error code on error */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) { u32 action[] = { XE_GUC_ACTION_TLB_INVALIDATION, @@ -238,7 +241,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), }; - return send_tlb_invalidation(>->uc.guc, NULL, action, + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); } @@ -257,13 +260,17 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) if (xe_guc_ct_enabled(>->uc.guc.ct) && gt->uc.guc.submission_state.enabled) { - int seqno; - - seqno = xe_gt_tlb_invalidation_guc(gt); - if (seqno <= 0) - return seqno; + struct xe_gt_tlb_invalidation_fence fence; + int ret; + + xe_gt_tlb_invalidation_fence_init(gt, &fence, true); + ret = xe_gt_tlb_invalidation_guc(gt, &fence); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence); + return ret; + } - xe_gt_tlb_invalidation_wait(gt, seqno); + xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { if (IS_SRIOV_VF(xe)) return 0; @@ -290,18 +297,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL + * completion * @start: start address * @end: end address * @asid: address space id * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -312,11 +317,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; + xe_gt_assert(gt, fence); + /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { - if (fence) - __invalidation_fence_signal(xe, fence); - + __invalidation_fence_signal(xe, fence); return 0; } @@ -382,12 +387,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, * @vma: VMA to invalidate * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -401,43 +404,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, } /** - * xe_gt_tlb_invalidation_wait - Wait for TLB to complete - * @gt: graphics tile - * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation - * - * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete. - * - * Return: 0 on success, -ETIME on TLB invalidation timeout - */ -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) -{ - struct xe_guc *guc = >->uc.guc; - int ret; - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) - return 0; - - /* - * XXX: See above, this algorithm only works if seqno are always in - * order - */ - ret = wait_event_timeout(guc->ct.wq, - tlb_invalidation_seqno_past(gt, seqno), - tlb_timeout_jiffies(gt)); - if (!ret) { - struct drm_printer p = xe_gt_err_printer(gt); - - xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n", - seqno, gt->tlb_invalidation.seqno_recv); - xe_guc_ct_print(&guc->ct, &p, true); - return -ETIME; - } - - return 0; -} - -/** * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * @guc: guc * @msg: message indicating TLB invalidation done @@ -480,12 +446,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } - /* - * wake_up_all() and wait_event_timeout() already have the correct - * barriers. - */ WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { @@ -508,3 +469,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } + +static const char * +invalidation_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "invalidation_fence"; +} + +static const struct dma_fence_ops invalidation_fence_ops = { + .get_driver_name = invalidation_fence_get_driver_name, + .get_timeline_name = invalidation_fence_get_timeline_name, +}; + +/** + * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence + * @gt: GT + * @fence: TLB invalidation fence to initialize + * @stack: fence is stack variable + * + * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini + * must be called if fence is not signaled. + */ +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack) +{ + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + + spin_lock_irq(>->tlb_invalidation.lock); + dma_fence_init(&fence->base, &invalidation_fence_ops, + >->tlb_invalidation.lock, + dma_fence_context_alloc(1), 1); + spin_unlock_irq(>->tlb_invalidation.lock); + INIT_LIST_HEAD(&fence->link); + if (stack) + set_bit(FENCE_STACK_BIT, &fence->base.flags); + else + dma_fence_get(&fence->base); + fence->gt = gt; +} + +/** + * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence + * @fence: TLB invalidation fence to finalize + * + * Drop PM ref which fence took durinig init. + */ +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) +{ + xe_pm_runtime_put(gt_to_xe(fence->gt)); +} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index bf3bebd9f985..a84065fa324c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack); +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence); + +static inline void +xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) +{ + dma_fence_wait(&fence->base, false); +} + #endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h index 934c828efe31..de6e825e0851 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h @@ -8,6 +8,8 @@ #include <linux/dma-fence.h> +struct xe_gt; + /** * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence * @@ -17,6 +19,8 @@ struct xe_gt_tlb_invalidation_fence { /** @base: dma fence base */ struct dma_fence base; + /** @gt: GT which fence belong to */ + struct xe_gt *gt; /** @link: link into list of pending tlb fences */ struct list_head link; /** @seqno: seqno of TLB invalidation to signal fence one */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 7d2e937da1d8..64afc90ad2c5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 || state == XE_GUC_CT_STATE_STOPPED); + if (ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); ct->g2h_outstanding = 0; ct->state = state; @@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space); + xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) || + (g2h_len && num_g2h)); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); + if (!ct->g2h_outstanding) + xe_pm_runtime_get_noresume(ct_to_xe(ct)); + ct->ctbs.g2h.info.space -= g2h_len; ct->g2h_outstanding += num_g2h; } @@ -511,7 +518,8 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.space += g2h_len; - --ct->g2h_outstanding; + if (!--ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); } static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 8d7e7f4bbff7..6398629e6b4e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1393,6 +1393,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) default: XE_WARN_ON("Unknown message type"); } + + xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); } static const struct drm_sched_backend_ops drm_sched_ops = { @@ -1482,6 +1484,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q) static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, u32 opcode) { + xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); + INIT_LIST_HEAD(&msg->link); msg->opcode = opcode; msg->private_data = q; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ade9e7a3a0ad..31a751a5de3f 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1115,23 +1115,6 @@ struct invalidation_fence { u32 asid; }; -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - static void invalidation_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { @@ -1170,15 +1153,8 @@ static int invalidation_fence_init(struct xe_gt *gt, trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&ifence->base.base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); - - INIT_LIST_HEAD(&ifence->base.link); + xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; ifence->start = start; diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index c4e018aa2982..e8d31e010860 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, u64 value) { struct xe_user_fence *ufence; + u64 __user *ptr = u64_to_user_ptr(addr); + + if (!access_ok(ptr, sizeof(ptr))) + return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) - return NULL; + return ERR_PTR(-ENOMEM); ufence->xe = xe; kref_init(&ufence->refcount); - ufence->addr = u64_to_user_ptr(addr); + ufence->addr = ptr; ufence->value = value; ufence->mm = current->mm; mmgrab(ufence->mm); @@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } else { sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); - if (XE_IOCTL_DBG(xe, !sync->ufence)) - return -ENOMEM; + if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) + return PTR_ERR(sync->ufence); } break; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5b166fa03684..c7561a56abaf 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1601,6 +1601,10 @@ static void vm_destroy_work_func(struct work_struct *w) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); + + if (vm->xef) + xe_file_put(vm->xef); + kfree(vm); } @@ -1916,7 +1920,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, } args->vm_id = id; - vm->xef = xef; + vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ for_each_tile(tile, xe, id) @@ -3337,10 +3341,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; u32 tile_needs_invalidate = 0; - int seqno[XE_MAX_TILES_PER_DEVICE]; u8 id; - int ret; + int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); trace_xe_vma_invalidate(vma); @@ -3365,29 +3369,33 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) for_each_tile(tile, xe, id) { if (xe_pt_zap_ptes(tile, vma)) { - tile_needs_invalidate |= BIT(id); xe_device_wmb(xe); + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[id], true); + /* * FIXME: We potentially need to invalidate multiple * GTs within the tile */ - seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); - if (seqno[id] < 0) - return seqno[id]; - } - } + ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, + &fence[id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[id]); + goto wait; + } - for_each_tile(tile, xe, id) { - if (tile_needs_invalidate & BIT(id)) { - ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); - if (ret < 0) - return ret; + tile_needs_invalidate |= BIT(id); } } +wait: + for_each_tile(tile, xe, id) + if (tile_needs_invalidate & BIT(id)) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); + vma->tile_invalidated = vma->tile_mask; - return 0; + return ret; } struct xe_vm_snapshot { diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c index 705b52337068..81f3024b7b1b 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c @@ -171,11 +171,13 @@ err_hid_data: void amdtp_hid_remove(struct amdtp_cl_data *cli_data) { int i; + struct amdtp_hid_data *hid_data; for (i = 0; i < cli_data->num_hid_devices; ++i) { if (cli_data->hid_sensor_hubs[i]) { - kfree(cli_data->hid_sensor_hubs[i]->driver_data); + hid_data = cli_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(cli_data->hid_sensor_hubs[i]); + kfree(hid_data); cli_data->hid_sensor_hubs[i] = NULL; } } diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 37e6d25593c2..a282388b7aa5 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1249,6 +1249,9 @@ static const struct hid_device_id asus_devices[] = { USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X), + QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD), QUIRK_ROG_CLAYMORE_II_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, diff --git a/drivers/hid/hid-cougar.c b/drivers/hid/hid-cougar.c index cb8bd8aae15b..0fa785f52707 100644 --- a/drivers/hid/hid-cougar.c +++ b/drivers/hid/hid-cougar.c @@ -106,7 +106,7 @@ static void cougar_fix_g6_mapping(void) static __u8 *cougar_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (rdesc[2] == 0x09 && rdesc[3] == 0x02 && + if (*rsize >= 117 && rdesc[2] == 0x09 && rdesc[3] == 0x02 && (rdesc[115] | rdesc[116] << 8) >= HID_MAX_USAGES) { hid_info(hdev, "usage count exceeds max: fixing up report descriptor\n"); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 72d56ee7ce1b..781c5aa29859 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -210,6 +210,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3 0x1a30 #define USB_DEVICE_ID_ASUSTEK_ROG_Z13_LIGHTBAR 0x18c6 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe +#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 @@ -520,6 +521,8 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 #define I2C_VENDOR_ID_GOODIX 0x27c6 +#define I2C_DEVICE_ID_GOODIX_01E8 0x01e8 +#define I2C_DEVICE_ID_GOODIX_01E9 0x01e9 #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 #define USB_VENDOR_ID_GOODTOUCH 0x1aad diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 56fc78841f24..99812c0f830b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1441,6 +1441,30 @@ static int mt_event(struct hid_device *hid, struct hid_field *field, return 0; } +static __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *size) +{ + if (hdev->vendor == I2C_VENDOR_ID_GOODIX && + (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || + hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { + if (rdesc[607] == 0x15) { + rdesc[607] = 0x25; + dev_info( + &hdev->dev, + "GT7868Q report descriptor fixup is applied.\n"); + } else { + dev_info( + &hdev->dev, + "The byte is not expected for fixing the report descriptor. \ +It's possible that the touchpad firmware is not suitable for applying the fix. \ +got: %x\n", + rdesc[607]); + } + } + + return rdesc; +} + static void mt_report(struct hid_device *hid, struct hid_report *report) { struct mt_device *td = hid_get_drvdata(hid); @@ -2035,6 +2059,14 @@ static const struct hid_device_id mt_devices[] = { MT_BT_DEVICE(USB_VENDOR_ID_FRUCTEL, USB_DEVICE_ID_GAMETEL_MT_MODE) }, + /* Goodix GT7868Q devices */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + /* GoodTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_GOODTOUCH, @@ -2270,6 +2302,7 @@ static struct hid_driver mt_driver = { .feature_mapping = mt_feature_mapping, .usage_table = mt_grabbed_usages, .event = mt_event, + .report_fixup = mt_report_fixup, .report = mt_report, .suspend = pm_ptr(mt_suspend), .reset_resume = pm_ptr(mt_reset_resume), diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 1f4564982b95..2541fa2e0fa3 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1878,12 +1878,14 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, int fmax = field->logical_maximum; unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid); int resolution_code = code; - int resolution = hidinput_calc_abs_res(field, resolution_code); + int resolution; if (equivalent_usage == HID_DG_TWIST) { resolution_code = ABS_RZ; } + resolution = hidinput_calc_abs_res(field, resolution_code); + if (equivalent_usage == HID_GD_X) { fmin += features->offset_left; fmax -= features->offset_right; diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 365e37bba0f3..06e836e3e877 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -986,8 +986,10 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) return ret; ret = clk_prepare_enable(gi2c->core_clk); - if (ret) + if (ret) { + geni_icc_disable(&gi2c->se); return ret; + } ret = geni_se_resources_on(&gi2c->se); if (ret) { diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 85b31edc558d..1df5b4204142 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1802,9 +1802,9 @@ static int tegra_i2c_probe(struct platform_device *pdev) * domain. * * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't - * be used for atomic transfers. + * be used for atomic transfers. ACPI device is not IRQ safe also. */ - if (!IS_VI(i2c_dev)) + if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev)) pm_runtime_irq_safe(i2c_dev->dev); pm_runtime_enable(i2c_dev->dev); diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index cd679c13752e..81e9cc6e3164 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -170,6 +170,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ + return; } /* diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 9a7ec5997c61..3214a4c17c6b 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -526,7 +526,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, err_unresv: if (hwpt_is_paging(hwpt)) iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + to_hwpt_paging(hwpt)); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index f95e32e29133..222cfc11ebfd 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -273,7 +273,7 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, return 0; } -const struct iommu_dirty_ops dirty_ops = { +static const struct iommu_dirty_ops dirty_ops = { .set_dirty_tracking = mock_domain_set_dirty_tracking, .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c2c07bfa6471..f299ff393a6a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1181,8 +1181,26 @@ static int do_resume(struct dm_ioctl *param) suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; if (param->flags & DM_NOFLUSH_FLAG) suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; - if (!dm_suspended_md(md)) - dm_suspend(md, suspend_flags); + if (!dm_suspended_md(md)) { + r = dm_suspend(md, suspend_flags); + if (r) { + down_write(&_hash_lock); + hc = dm_get_mdptr(md); + if (hc && !hc->new_map) { + hc->new_map = new_map; + new_map = NULL; + } else { + r = -ENXIO; + } + up_write(&_hash_lock); + if (new_map) { + dm_sync_table(md); + dm_table_destroy(new_map); + } + dm_put(md); + return r; + } + } old_size = dm_get_size(md); old_map = dm_swap_table(md, new_map); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 97fab2087df8..87bb90303435 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2737,7 +2737,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } @@ -2762,7 +2762,7 @@ static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_st break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 04698fd03e60..d48c4fafc779 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -277,7 +277,7 @@ static void sm_metadata_destroy(struct dm_space_map *sm) { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - kfree(smm); + kvfree(smm); } static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) @@ -772,7 +772,7 @@ struct dm_space_map *dm_sm_metadata_init(void) { struct sm_metadata *smm; - smm = kmalloc(sizeof(*smm), GFP_KERNEL); + smm = kvmalloc(sizeof(*smm), GFP_KERNEL); if (!smm) return ERR_PTR(-ENOMEM); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7acfe7c9dc8d..761989d67906 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, return -1; } +static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio) +{ + return !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < r1_bio->sector + r1_bio->sectors; +} + static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors) { @@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio) || test_bit(WriteMostly, &rdev->flags)) continue; @@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || - !test_bit(WriteMostly, &rdev->flags)) + !test_bit(WriteMostly, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio)) continue; /* there are no bad blocks, we can use this disk */ @@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio) if (!rdev || test_bit(Faulty, &rdev->flags)) return false; - /* still in recovery */ - if (!test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < r1_bio->sector + r1_bio->sectors) + if (rdev_in_recovery(rdev, r1_bio)) return false; /* don't read from slow disk unless have to */ diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 5204fda51da3..339d126414d4 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2085,16 +2085,6 @@ err_invoke: return err; } -static int is_attach_rejected(struct fastrpc_user *fl) -{ - /* Check if the device node is non-secure */ - if (!fl->is_secure_dev) { - dev_dbg(&fl->cctx->rpdev->dev, "untrusted app trying to attach to privileged DSP PD\n"); - return -EACCES; - } - return 0; -} - static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2107,19 +2097,13 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, ROOT_PD); + err = fastrpc_init_attach(fl, ROOT_PD); break; case FASTRPC_IOCTL_INIT_ATTACH_SNS: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, SENSORS_PD); + err = fastrpc_init_attach(fl, SENSORS_PD); break; case FASTRPC_IOCTL_INIT_CREATE_STATIC: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_create_static_process(fl, argp); + err = fastrpc_init_create_static_process(fl, argp); break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c index 5cd488f54cfa..8f744bee6fbd 100644 --- a/drivers/misc/lkdtm/refcount.c +++ b/drivers/misc/lkdtm/refcount.c @@ -182,6 +182,21 @@ static void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void) check_negative(&neg, 3); } +/* + * A refcount_sub_and_test() by zero when the counter is at zero should act like + * refcount_sub_and_test() above when going negative. + */ +static void lkdtm_REFCOUNT_SUB_AND_TEST_ZERO(void) +{ + refcount_t neg = REFCOUNT_INIT(0); + + pr_info("attempting bad refcount_sub_and_test() at zero\n"); + if (refcount_sub_and_test(0, &neg)) + pr_warn("Weird: refcount_sub_and_test() reported zero\n"); + + check_negative(&neg, 0); +} + static void check_from_zero(refcount_t *ref) { switch (refcount_read(ref)) { @@ -400,6 +415,7 @@ static struct crashtype crashtypes[] = { CRASHTYPE(REFCOUNT_DEC_NEGATIVE), CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE), CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE), + CRASHTYPE(REFCOUNT_SUB_AND_TEST_ZERO), CRASHTYPE(REFCOUNT_INC_ZERO), CRASHTYPE(REFCOUNT_ADD_ZERO), CRASHTYPE(REFCOUNT_INC_SATURATED), diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f9633a6f8571..96f5470a5f55 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -582,7 +582,6 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) } else { slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); } - ipsec->xs->xso.real_dev = NULL; } spin_unlock_bh(&bond->ipsec_lock); rcu_read_unlock(); @@ -599,34 +598,30 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) struct net_device *real_dev; struct slave *curr_active; struct bonding *bond; - int err; + bool ok = false; bond = netdev_priv(bond_dev); rcu_read_lock(); curr_active = rcu_dereference(bond->curr_active_slave); + if (!curr_active) + goto out; real_dev = curr_active->dev; - if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { - err = false; + if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) goto out; - } - if (!xs->xso.real_dev) { - err = false; + if (!xs->xso.real_dev) goto out; - } if (!real_dev->xfrmdev_ops || !real_dev->xfrmdev_ops->xdo_dev_offload_ok || - netif_is_bond_master(real_dev)) { - err = false; + netif_is_bond_master(real_dev)) goto out; - } - err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); + ok = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); out: rcu_read_unlock(); - return err; + return ok; } static const struct xfrmdev_ops bond_xfrmdev_ops = { diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index bc80fb6397dc..95d59a18c022 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -936,7 +936,7 @@ static int bond_option_active_slave_set(struct bonding *bond, /* check to see if we are clearing active */ if (!slave_dev) { netdev_dbg(bond->dev, "Clearing current active slave\n"); - RCU_INIT_POINTER(bond->curr_active_slave, NULL); + bond_change_active_slave(bond, NULL); bond_select_active_slave(bond); } else { struct slave *old_active = rtnl_dereference(bond->curr_active_slave); diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index f0bd46e5d4ec..050f17c43ef6 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c @@ -266,7 +266,6 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) struct ksz_port *prt; struct dsa_port *dp; bool tag_en = false; - int ret; dsa_switch_for_each_user_port(dp, dev->ds) { prt = &dev->ports[dp->index]; @@ -277,9 +276,7 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) } if (tag_en) { - ret = ptp_schedule_worker(ptp_data->clock, 0); - if (ret) - return ret; + ptp_schedule_worker(ptp_data->clock, 0); } else { ptp_cancel_worker_sync(ptp_data->clock); } diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index ce3b3690c3c0..c47f068f56b3 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -457,7 +457,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) trace_mv88e6xxx_atu_full_violation(chip->dev, spid, entry.portvec, entry.mac, fid); - chip->ports[spid].atu_full_violation++; + if (spid < ARRAY_SIZE(chip->ports)) + chip->ports[spid].atu_full_violation++; } return IRQ_HANDLED; diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 800711c2b6cb..3aa9c997018a 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -61,11 +61,46 @@ static int felix_cpu_port_for_conduit(struct dsa_switch *ds, return cpu_dp->index; } +/** + * felix_update_tag_8021q_rx_rule - Update VCAP ES0 tag_8021q rule after + * vlan_filtering change + * @outer_tagging_rule: Pointer to VCAP filter on which the update is performed + * @vlan_filtering: Current bridge VLAN filtering setting + * + * Source port identification for tag_8021q is done using VCAP ES0 rules on the + * CPU port(s). The ES0 tag B (inner tag from the packet) can be configured as + * either: + * - push_inner_tag=0: the inner tag is never pushed into the frame + * (and we lose info about the classified VLAN). This is + * good when the classified VLAN is a discardable quantity + * for the software RX path: it is either set to + * OCELOT_STANDALONE_PVID, or to + * ocelot_vlan_unaware_pvid(bridge). + * - push_inner_tag=1: the inner tag is always pushed. This is good when the + * classified VLAN is not a discardable quantity (the port + * is under a VLAN-aware bridge, and software needs to + * continue processing the packet in the same VLAN as the + * hardware). + * The point is that what is good for a VLAN-unaware port is not good for a + * VLAN-aware port, and vice versa. Thus, the RX tagging rules must be kept in + * sync with the VLAN filtering state of the port. + */ +static void +felix_update_tag_8021q_rx_rule(struct ocelot_vcap_filter *outer_tagging_rule, + bool vlan_filtering) +{ + if (vlan_filtering) + outer_tagging_rule->action.push_inner_tag = OCELOT_ES0_TAG; + else + outer_tagging_rule->action.push_inner_tag = OCELOT_NO_ES0_TAG; +} + /* Set up VCAP ES0 rules for pushing a tag_8021q VLAN towards the CPU such that * the tagger can perform RX source port identification. */ static int felix_tag_8021q_vlan_add_rx(struct dsa_switch *ds, int port, - int upstream, u16 vid) + int upstream, u16 vid, + bool vlan_filtering) { struct ocelot_vcap_filter *outer_tagging_rule; struct ocelot *ocelot = ds->priv; @@ -96,6 +131,14 @@ static int felix_tag_8021q_vlan_add_rx(struct dsa_switch *ds, int port, outer_tagging_rule->action.tag_a_tpid_sel = OCELOT_TAG_TPID_SEL_8021AD; outer_tagging_rule->action.tag_a_vid_sel = 1; outer_tagging_rule->action.vid_a_val = vid; + felix_update_tag_8021q_rx_rule(outer_tagging_rule, vlan_filtering); + outer_tagging_rule->action.tag_b_tpid_sel = OCELOT_TAG_TPID_SEL_8021Q; + /* Leave TAG_B_VID_SEL at 0 (Classified VID + VID_B_VAL). Since we also + * leave VID_B_VAL at 0, this makes ES0 tag B (the inner tag) equal to + * the classified VID, which we need to see in the DSA tagger's receive + * path. Note: the inner tag is only visible in the packet when pushed + * (push_inner_tag == OCELOT_ES0_TAG). + */ err = ocelot_vcap_filter_add(ocelot, outer_tagging_rule, NULL); if (err) @@ -227,6 +270,7 @@ static int felix_tag_8021q_vlan_del_tx(struct dsa_switch *ds, int port, u16 vid) static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, u16 flags) { + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_port *cpu_dp; int err; @@ -234,11 +278,12 @@ static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, * membership, which we aren't. So we don't need to add any VCAP filter * for the CPU port. */ - if (!dsa_is_user_port(ds, port)) + if (!dsa_port_is_user(dp)) return 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) { - err = felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid); + err = felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid, + dsa_port_is_vlan_filtering(dp)); if (err) return err; } @@ -258,10 +303,11 @@ add_tx_failed: static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) { + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_port *cpu_dp; int err; - if (!dsa_is_user_port(ds, port)) + if (!dsa_port_is_user(dp)) return 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) { @@ -278,11 +324,41 @@ static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) del_tx_failed: dsa_switch_for_each_cpu_port(cpu_dp, ds) - felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid); + felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid, + dsa_port_is_vlan_filtering(dp)); return err; } +static int felix_update_tag_8021q_rx_rules(struct dsa_switch *ds, int port, + bool vlan_filtering) +{ + struct ocelot_vcap_filter *outer_tagging_rule; + struct ocelot_vcap_block *block_vcap_es0; + struct ocelot *ocelot = ds->priv; + struct dsa_port *cpu_dp; + unsigned long cookie; + int err; + + block_vcap_es0 = &ocelot->block[VCAP_ES0]; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + cookie = OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, + cpu_dp->index); + + outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0, + cookie, false); + + felix_update_tag_8021q_rx_rule(outer_tagging_rule, vlan_filtering); + + err = ocelot_vcap_filter_replace(ocelot, outer_tagging_rule); + if (err) + return err; + } + + return 0; +} + static int felix_trap_get_cpu_port(struct dsa_switch *ds, const struct ocelot_vcap_filter *trap) { @@ -528,7 +604,19 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds) * so we need to be careful that there are no extra frames to be * dequeued over MMIO, since we would never know to discard them. */ + ocelot_lock_xtr_grp_bh(ocelot, 0); ocelot_drain_cpu_queue(ocelot, 0); + ocelot_unlock_xtr_grp_bh(ocelot, 0); + + /* Problem: when using push_inner_tag=1 for ES0 tag B, we lose info + * about whether the received packets were VLAN-tagged on the wire, + * since they are always tagged on egress towards the CPU port. + * + * Since using push_inner_tag=1 is unavoidable for VLAN-aware bridges, + * we must work around the fallout by untagging in software to make + * untagged reception work more or less as expected. + */ + ds->untag_vlan_aware_bridge_pvid = true; return 0; } @@ -554,6 +642,8 @@ static void felix_tag_8021q_teardown(struct dsa_switch *ds) ocelot_port_teardown_dsa_8021q_cpu(ocelot, dp->index); dsa_tag_8021q_unregister(ds); + + ds->untag_vlan_aware_bridge_pvid = false; } static unsigned long felix_tag_8021q_get_host_fwd_mask(struct dsa_switch *ds) @@ -1008,8 +1098,23 @@ static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, struct netlink_ext_ack *extack) { struct ocelot *ocelot = ds->priv; + bool using_tag_8021q; + struct felix *felix; + int err; - return ocelot_port_vlan_filtering(ocelot, port, enabled, extack); + err = ocelot_port_vlan_filtering(ocelot, port, enabled, extack); + if (err) + return err; + + felix = ocelot_to_felix(ocelot); + using_tag_8021q = felix->tag_proto == DSA_TAG_PROTO_OCELOT_8021Q; + if (using_tag_8021q) { + err = felix_update_tag_8021q_rx_rules(ds, port, enabled); + if (err) + return err; + } + + return 0; } static int felix_vlan_add(struct dsa_switch *ds, int port, @@ -1515,6 +1620,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) int port = xmit_work->dp->index; int retries = 10; + ocelot_lock_inj_grp(ocelot, 0); + do { if (ocelot_can_inject(ocelot, 0)) break; @@ -1523,6 +1630,7 @@ static void felix_port_deferred_xmit(struct kthread_work *work) } while (--retries); if (!retries) { + ocelot_unlock_inj_grp(ocelot, 0); dev_err(ocelot->dev, "port %d failed to inject skb\n", port); ocelot_port_purge_txtstamp_skb(ocelot, port, skb); @@ -1532,6 +1640,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); + ocelot_unlock_inj_grp(ocelot, 0); + consume_skb(skb); kfree(xmit_work); } @@ -1691,6 +1801,8 @@ static bool felix_check_xtr_pkt(struct ocelot *ocelot) if (!felix->info->quirk_no_xtr_irq) return false; + ocelot_lock_xtr_grp(ocelot, grp); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { struct sk_buff *skb; unsigned int type; @@ -1727,6 +1839,8 @@ out: ocelot_drain_cpu_queue(ocelot, 0); } + ocelot_unlock_xtr_grp(ocelot, grp); + return true; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 75e2cfed5769..b26a7e2aa132 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5056,7 +5056,7 @@ void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr) list_del_init(&fltr->list); } -void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all) +static void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all) { struct bnxt_filter_base *usr_fltr, *tmp; @@ -10271,7 +10271,7 @@ static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) } } -void bnxt_clear_rss_ctxs(struct bnxt *bp) +static void bnxt_clear_rss_ctxs(struct bnxt *bp) { struct ethtool_rxfh_context *ctx; unsigned long context; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 62e637c5be31..610c0fe468ad 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2795,7 +2795,6 @@ void bnxt_set_ring_params(struct bnxt *); int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); void bnxt_insert_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); -void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all); int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, bool async_only); int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp); @@ -2849,7 +2848,6 @@ int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bool all); -void bnxt_clear_rss_ctxs(struct bnxt *bp); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 39eed5831e3a..194f47316c77 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -968,9 +968,6 @@ static int bnxt_set_channels(struct net_device *dev, return -EINVAL; } - bnxt_clear_usr_fltrs(bp, true); - if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) - bnxt_clear_rss_ctxs(bp); if (netif_running(dev)) { if (BNXT_PF(bp)) { /* TODO CHIMP_FW: Send message to all VF's @@ -2000,7 +1997,6 @@ static int bnxt_set_rxfh(struct net_device *dev, bnxt_modify_rss(bp, NULL, NULL, rxfh); - bnxt_clear_usr_fltrs(bp, false); if (netif_running(bp->dev)) { bnxt_close_nic(bp, false, false); rc = bnxt_open_nic(bp, false, false); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 345681d5007e..f88b641533fc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -297,11 +297,6 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, * redirect is coming from a frame received by the * bnxt_en driver. */ - rx_buf = &rxr->rx_buf_ring[cons]; - mapping = rx_buf->mapping - bp->rx_dma_offset; - dma_unmap_page_attrs(&pdev->dev, mapping, - BNXT_RX_PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); /* if we are unable to allocate a new buffer, abort and reuse */ if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 786ceae34488..dd9e68465e69 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1244,7 +1244,8 @@ static u64 hash_filter_ntuple(struct ch_filter_specification *fs, * in the Compressed Filter Tuple. */ if (tp->vlan_shift >= 0 && fs->mask.ivlan) - ntuple |= (FT_VLAN_VLD_F | fs->val.ivlan) << tp->vlan_shift; + ntuple |= (u64)(FT_VLAN_VLD_F | + fs->val.ivlan) << tp->vlan_shift; if (tp->port_shift >= 0 && fs->mask.iport) ntuple |= (u64)fs->val.iport << tp->port_shift; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index a71f848adc05..a293b08f36d4 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -2638,13 +2638,14 @@ static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw) static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw) { - int *count, i; + int *count, ret, i; for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) { + ret = dpaa2_switch_add_bufs(ethsw, ethsw->bpid); count = ðsw->buf_count; - *count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid); + *count += ret; - if (unlikely(*count < BUFS_PER_CMD)) + if (unlikely(ret < BUFS_PER_CMD)) return -ENOMEM; } diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c index 00fed5a61d62..62ef8e2fb5f1 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c @@ -337,7 +337,7 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) return -EIO; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = pf->hw.bus.func; + attrs.phys.port_number = pf->hw.pf_id; /* As FW supports only port split options for whole device, * set port split options only for first PF. @@ -455,7 +455,7 @@ int ice_devlink_create_vf_port(struct ice_vf *vf) return -EINVAL; attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; - attrs.pci_vf.pf = pf->hw.bus.func; + attrs.pci_vf.pf = pf->hw.pf_id; attrs.pci_vf.vf = vf->vf_id; ice_devlink_set_switch_id(pf, &attrs.switch_id); diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 1facf179a96f..f448d3a84564 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -513,6 +513,25 @@ static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring) } /** + * ice_get_frame_sz - calculate xdp_buff::frame_sz + * @rx_ring: the ring being configured + * + * Return frame size based on underlying PAGE_SIZE + */ +static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) +{ + unsigned int frame_sz; + +#if (PAGE_SIZE >= 8192) + frame_sz = rx_ring->rx_buf_len; +#else + frame_sz = ice_rx_pg_size(rx_ring) / 2; +#endif + + return frame_sz; +} + +/** * ice_vsi_cfg_rxq - Configure an Rx queue * @ring: the ring being configured * @@ -576,7 +595,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) } } - xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq); + xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq); ring->xdp.data = NULL; ring->xdp_ext.pkt_ctx = &ring->pkt_ctx; err = ice_setup_rx_ctx(ring); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8d25b6981269..c9bc3f1add5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -522,30 +522,6 @@ err: } /** - * ice_rx_frame_truesize - * @rx_ring: ptr to Rx ring - * @size: size - * - * calculate the truesize with taking into the account PAGE_SIZE of - * underlying arch - */ -static unsigned int -ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) -{ - unsigned int truesize; - -#if (PAGE_SIZE < 8192) - truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = rx_ring->rx_offset ? - SKB_DATA_ALIGN(rx_ring->rx_offset + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); -#endif - return truesize; -} - -/** * ice_run_xdp - Executes an XDP program on initialized xdp_buff * @rx_ring: Rx ring * @xdp: xdp_buff used as input to the XDP program @@ -837,16 +813,15 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) if (!dev_page_is_reusable(page)) return false; -#if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) return false; -#else +#if (PAGE_SIZE >= 8192) #define ICE_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) + (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072) if (rx_buf->page_offset > ICE_LAST_OFFSET) return false; -#endif /* PAGE_SIZE < 8192) */ +#endif /* PAGE_SIZE >= 8192) */ /* If we have drained the page fragment pool we need to update * the pagecnt_bias and page count so that we fully restock the @@ -949,12 +924,7 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = -#if (PAGE_SIZE < 8192) - page_count(rx_buf->page); -#else - 0; -#endif + rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) @@ -1160,11 +1130,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) bool failure; u32 first; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0); -#endif - xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { xdp_ring = rx_ring->xdp_ring; @@ -1223,10 +1188,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) hard_start = page_address(rx_buf->page) + rx_buf->page_offset - offset; xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); -#endif xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { break; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 11be39f435f3..33a42b4c21e0 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4808,6 +4808,7 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter, #if (PAGE_SIZE < 8192) if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB || + IGB_2K_TOO_SMALL_WITH_PADDING || rd32(E1000_RCTL) & E1000_RCTL_SBP) set_ring_uses_large_buffer(rx_ring); #endif diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 3e09d2285814..daf4b951e905 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -632,7 +632,9 @@ int rvu_mbox_handler_cpt_inline_ipsec_cfg(struct rvu *rvu, return ret; } -static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) +static bool validate_and_update_reg_offset(struct rvu *rvu, + struct cpt_rd_wr_reg_msg *req, + u64 *reg_offset) { u64 offset = req->reg_offset; int blkaddr, num_lfs, lf; @@ -663,6 +665,11 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) if (lf < 0) return false; + /* Translate local LF's offset to global CPT LF's offset to + * access LFX register. + */ + *reg_offset = (req->reg_offset & 0xFF000) + (lf << 3); + return true; } else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) { /* Registers that can be accessed from PF */ @@ -697,7 +704,7 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, struct cpt_rd_wr_reg_msg *rsp) { u64 offset = req->reg_offset; - int blkaddr, lf; + int blkaddr; blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); if (blkaddr < 0) @@ -708,18 +715,10 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, !is_cpt_vf(rvu, req->hdr.pcifunc)) return CPT_AF_ERR_ACCESS_DENIED; - if (!is_valid_offset(rvu, req)) + if (!validate_and_update_reg_offset(rvu, req, &offset)) return CPT_AF_ERR_ACCESS_DENIED; - /* Translate local LF used by VFs to global CPT LF */ - lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], req->hdr.pcifunc, - (offset & 0xFFF) >> 3); - - /* Translate local LF's offset to global CPT LF's offset */ - offset &= 0xFF000; - offset += lf << 3; - - rsp->reg_offset = offset; + rsp->reg_offset = req->reg_offset; rsp->ret_val = req->ret_val; rsp->is_write = req->is_write; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 17f6457a5193..da0a1c65ec4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -998,6 +998,7 @@ void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode); +void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len); void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5df904639b0c..16b67c457b60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1236,6 +1236,14 @@ void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) rq->mpwqe.actual_wq_head = wq->head; rq->mpwqe.umr_in_progress = 0; rq->mpwqe.umr_completed = 0; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 len; + + len = (shampo->pi - shampo->ci) & shampo->hd_per_wq; + mlx5e_shampo_fill_umr(rq, len); + } } void mlx5e_free_rx_descs(struct mlx5e_rq *rq) @@ -3020,15 +3028,18 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, struct mlx5e_params *params) { - struct mlx5_core_dev *mdev = priv->mdev; - int num_comp_vectors, ix, irq; - - num_comp_vectors = mlx5_comp_vectors_max(mdev); + int ix; for (ix = 0; ix < params->num_channels; ix++) { + int num_comp_vectors, irq, vec_ix; + struct mlx5_core_dev *mdev; + + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); + num_comp_vectors = mlx5_comp_vectors_max(mdev); cpumask_clear(priv->scratchpad.cpumask); + vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); - for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) { int cpu = mlx5_comp_vector_get_cpu(mdev, irq); cpumask_set_cpu(cpu, priv->scratchpad.cpumask); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 225da8d691fc..de9d01036c28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -735,6 +735,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); + ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); if (!ksm_entries) return 0; @@ -962,26 +963,31 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) sq->cc = sqcc; } -static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, - struct mlx5e_icosq *sq) +void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len) { - struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); - struct mlx5e_shampo_hd *shampo; - /* assume 1:1 relationship between RQ and icosq */ - struct mlx5e_rq *rq = &c->rq; - int end, from, len = umr.len; + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + int end, from, full_len = len; - shampo = rq->mpwqe.shampo; end = shampo->hd_per_wq; from = shampo->ci; - if (from + len > shampo->hd_per_wq) { + if (from + len > end) { len -= end - from; bitmap_set(shampo->bitmap, from, end - from); from = 0; } bitmap_set(shampo->bitmap, from, len); - shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1); + shampo->ci = (shampo->ci + full_len) & (shampo->hd_per_wq - 1); +} + +static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, + struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); + /* assume 1:1 relationship between RQ and icosq */ + struct mlx5e_rq *rq = &c->rq; + + mlx5e_shampo_fill_umr(rq, umr.len); } int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c index 234cd00f71a1..b7d4b1a2baf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c @@ -386,7 +386,8 @@ static int ipsec_fs_roce_tx_mpv_create(struct mlx5_core_dev *mdev, return -EOPNOTSUPP; peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); - if (!peer_priv) { + if (!peer_priv || !peer_priv->ipsec) { + mlx5_core_err(mdev, "IPsec not supported on master device\n"); err = -EOPNOTSUPP; goto release_peer; } @@ -455,7 +456,8 @@ static int ipsec_fs_roce_rx_mpv_create(struct mlx5_core_dev *mdev, return -EOPNOTSUPP; peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); - if (!peer_priv) { + if (!peer_priv || !peer_priv->ipsec) { + mlx5_core_err(mdev, "IPsec not supported on master device\n"); err = -EOPNOTSUPP; goto release_peer; } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ed2fb44500b0..3d72aa7b1305 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -453,9 +453,158 @@ static u16 ocelot_vlan_unaware_pvid(struct ocelot *ocelot, return VLAN_N_VID - bridge_num - 1; } +/** + * ocelot_update_vlan_reclassify_rule() - Make switch aware only to bridge VLAN TPID + * + * @ocelot: Switch private data structure + * @port: Index of ingress port + * + * IEEE 802.1Q-2018 clauses "5.5 C-VLAN component conformance" and "5.6 S-VLAN + * component conformance" suggest that a C-VLAN component should only recognize + * and filter on C-Tags, and an S-VLAN component should only recognize and + * process based on C-Tags. + * + * In Linux, as per commit 1a0b20b25732 ("Merge branch 'bridge-next'"), C-VLAN + * components are largely represented by a bridge with vlan_protocol 802.1Q, + * and S-VLAN components by a bridge with vlan_protocol 802.1ad. + * + * Currently the driver only offloads vlan_protocol 802.1Q, but the hardware + * design is non-conformant, because the switch assigns each frame to a VLAN + * based on an entirely different question, as detailed in figure "Basic VLAN + * Classification Flow" from its manual and reproduced below. + * + * Set TAG_TYPE, PCP, DEI, VID to port-default values in VLAN_CFG register + * if VLAN_AWARE_ENA[port] and frame has outer tag then: + * if VLAN_INNER_TAG_ENA[port] and frame has inner tag then: + * TAG_TYPE = (Frame.InnerTPID <> 0x8100) + * Set PCP, DEI, VID to values from inner VLAN header + * else: + * TAG_TYPE = (Frame.OuterTPID <> 0x8100) + * Set PCP, DEI, VID to values from outer VLAN header + * if VID == 0 then: + * VID = VLAN_CFG.VLAN_VID + * + * Summarized, the switch will recognize both 802.1Q and 802.1ad TPIDs as VLAN + * "with equal rights", and just set the TAG_TYPE bit to 0 (if 802.1Q) or to 1 + * (if 802.1ad). It will classify based on whichever of the tags is "outer", no + * matter what TPID that may have (or "inner", if VLAN_INNER_TAG_ENA[port]). + * + * In the VLAN Table, the TAG_TYPE information is not accessible - just the + * classified VID is - so it is as if each VLAN Table entry is for 2 VLANs: + * C-VLAN X, and S-VLAN X. + * + * Whereas the Linux bridge behavior is to only filter on frames with a TPID + * equal to the vlan_protocol, and treat everything else as VLAN-untagged. + * + * Consider an ingress packet tagged with 802.1ad VID=3 and 802.1Q VID=5, + * received on a bridge vlan_filtering=1 vlan_protocol=802.1Q port. This frame + * should be treated as 802.1Q-untagged, and classified to the PVID of that + * bridge port. Not to VID=3, and not to VID=5. + * + * The VCAP IS1 TCAM has everything we need to overwrite the choices made in + * the basic VLAN classification pipeline: it can match on TAG_TYPE in the key, + * and it can modify the classified VID in the action. Thus, for each port + * under a vlan_filtering bridge, we can insert a rule in VCAP IS1 lookup 0 to + * match on 802.1ad tagged frames and modify their classified VID to the 802.1Q + * PVID of the port. This effectively makes it appear to the outside world as + * if those packets were processed as VLAN-untagged. + * + * The rule needs to be updated each time the bridge PVID changes, and needs + * to be deleted if the bridge PVID is deleted, or if the port becomes + * VLAN-unaware. + */ +static int ocelot_update_vlan_reclassify_rule(struct ocelot *ocelot, int port) +{ + unsigned long cookie = OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port); + struct ocelot_vcap_block *block_vcap_is1 = &ocelot->block[VCAP_IS1]; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + const struct ocelot_bridge_vlan *pvid_vlan; + struct ocelot_vcap_filter *filter; + int err, val, pcp, dei; + bool vid_replace_ena; + u16 vid; + + pvid_vlan = ocelot_port->pvid_vlan; + vid_replace_ena = ocelot_port->vlan_aware && pvid_vlan; + + filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is1, cookie, + false); + if (!vid_replace_ena) { + /* If the reclassification filter doesn't need to exist, delete + * it if it was previously installed, and exit doing nothing + * otherwise. + */ + if (filter) + return ocelot_vcap_filter_del(ocelot, filter); + + return 0; + } + + /* The reclassification rule must apply. See if it already exists + * or if it must be created. + */ + + /* Treating as VLAN-untagged means using as classified VID equal to + * the bridge PVID, and PCP/DEI set to the port default QoS values. + */ + vid = pvid_vlan->vid; + val = ocelot_read_gix(ocelot, ANA_PORT_QOS_CFG, port); + pcp = ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(val); + dei = !!(val & ANA_PORT_QOS_CFG_DP_DEFAULT_VAL); + + if (filter) { + bool changed = false; + + /* Filter exists, just update it */ + if (filter->action.vid != vid) { + filter->action.vid = vid; + changed = true; + } + if (filter->action.pcp != pcp) { + filter->action.pcp = pcp; + changed = true; + } + if (filter->action.dei != dei) { + filter->action.dei = dei; + changed = true; + } + + if (!changed) + return 0; + + return ocelot_vcap_filter_replace(ocelot, filter); + } + + /* Filter doesn't exist, create it */ + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->key_type = OCELOT_VCAP_KEY_ANY; + filter->ingress_port_mask = BIT(port); + filter->vlan.tpid = OCELOT_VCAP_BIT_1; + filter->prio = 1; + filter->id.cookie = cookie; + filter->id.tc_offload = false; + filter->block_id = VCAP_IS1; + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + filter->lookup = 0; + filter->action.vid_replace_ena = true; + filter->action.pcp_dei_ena = true; + filter->action.vid = vid; + filter->action.pcp = pcp; + filter->action.dei = dei; + + err = ocelot_vcap_filter_add(ocelot, filter, NULL); + if (err) + kfree(filter); + + return err; +} + /* Default vlan to clasify for untagged frames (may be zero) */ -static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, - const struct ocelot_bridge_vlan *pvid_vlan) +static int ocelot_port_set_pvid(struct ocelot *ocelot, int port, + const struct ocelot_bridge_vlan *pvid_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; u16 pvid = ocelot_vlan_unaware_pvid(ocelot, ocelot_port->bridge); @@ -475,15 +624,23 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, * happens automatically), but also 802.1p traffic which gets * classified to VLAN 0, but that is always in our RX filter, so it * would get accepted were it not for this setting. + * + * Also, we only support the bridge 802.1Q VLAN protocol, so + * 802.1ad-tagged frames (carrying S-Tags) should be considered + * 802.1Q-untagged, and also dropped. */ if (!pvid_vlan && ocelot_port->vlan_aware) val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA; ocelot_rmw_gix(ocelot, val, ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA, ANA_PORT_DROP_CFG, port); + + return ocelot_update_vlan_reclassify_rule(ocelot, port); } static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot, @@ -631,7 +788,10 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M, ANA_PORT_VLAN_CFG, port); - ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + err = ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + if (err) + return err; + ocelot_port_manage_port_tag(ocelot, port); return 0; @@ -684,9 +844,12 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, return err; /* Default ingress vlan classification */ - if (pvid) - ocelot_port_set_pvid(ocelot, port, - ocelot_bridge_vlan_find(ocelot, vid)); + if (pvid) { + err = ocelot_port_set_pvid(ocelot, port, + ocelot_bridge_vlan_find(ocelot, vid)); + if (err) + return err; + } /* Untagged egress vlan clasification */ ocelot_port_manage_port_tag(ocelot, port); @@ -712,8 +875,11 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) return err; /* Ingress */ - if (del_pvid) - ocelot_port_set_pvid(ocelot, port, NULL); + if (del_pvid) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; + } /* Egress */ ocelot_port_manage_port_tag(ocelot, port); @@ -1099,6 +1265,48 @@ void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, } EXPORT_SYMBOL(ocelot_ptp_rx_timestamp); +void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp) + __acquires(&ocelot->inj_lock) +{ + spin_lock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_inj_grp); + +void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp) + __releases(&ocelot->inj_lock) +{ + spin_unlock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_inj_grp); + +void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp) + __acquires(&ocelot->inj_lock) +{ + spin_lock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp); + +void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp) + __releases(&ocelot->inj_lock) +{ + spin_unlock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp); + +void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp) + __acquires(&ocelot->xtr_lock) +{ + spin_lock_bh(&ocelot->xtr_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp_bh); + +void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp) + __releases(&ocelot->xtr_lock) +{ + spin_unlock_bh(&ocelot->xtr_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp_bh); + int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) { u64 timestamp, src_port, len; @@ -1109,6 +1317,8 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) u32 val, *buf; int err; + lockdep_assert_held(&ocelot->xtr_lock); + err = ocelot_xtr_poll_xfh(ocelot, grp, xfh); if (err) return err; @@ -1184,6 +1394,8 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) { u32 val = ocelot_read(ocelot, QS_INJ_STATUS); + lockdep_assert_held(&ocelot->inj_lock); + if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp)))) return false; if (val & QS_INJ_STATUS_WMARK_REACHED(BIT(grp))) @@ -1193,28 +1405,55 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) } EXPORT_SYMBOL(ocelot_can_inject); -void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag) +/** + * ocelot_ifh_set_basic - Set basic information in Injection Frame Header + * @ifh: Pointer to Injection Frame Header memory + * @ocelot: Switch private data structure + * @port: Egress port number + * @rew_op: Egress rewriter operation for PTP + * @skb: Pointer to socket buffer (packet) + * + * Populate the Injection Frame Header with basic information for this skb: the + * analyzer bypass bit, destination port, VLAN info, egress rewriter info. + */ +void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct net_device *dev = skb->dev; + u64 vlan_tci, tag_type; + int qos_class; + + ocelot_xmit_get_vlan_info(skb, ocelot_port->bridge, &vlan_tci, + &tag_type); + + qos_class = netdev_get_num_tc(dev) ? + netdev_get_prio_tc_map(dev, skb->priority) : skb->priority; + + memset(ifh, 0, OCELOT_TAG_LEN); ocelot_ifh_set_bypass(ifh, 1); + ocelot_ifh_set_src(ifh, BIT_ULL(ocelot->num_phys_ports)); ocelot_ifh_set_dest(ifh, BIT_ULL(port)); - ocelot_ifh_set_tag_type(ifh, IFH_TAG_TYPE_C); - if (vlan_tag) - ocelot_ifh_set_vlan_tci(ifh, vlan_tag); + ocelot_ifh_set_qos_class(ifh, qos_class); + ocelot_ifh_set_tag_type(ifh, tag_type); + ocelot_ifh_set_vlan_tci(ifh, vlan_tci); if (rew_op) ocelot_ifh_set_rew_op(ifh, rew_op); } -EXPORT_SYMBOL(ocelot_ifh_port_set); +EXPORT_SYMBOL(ocelot_ifh_set_basic); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb) { - u32 ifh[OCELOT_TAG_LEN / 4] = {0}; + u32 ifh[OCELOT_TAG_LEN / 4]; unsigned int i, count, last; + lockdep_assert_held(&ocelot->inj_lock); + ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp); - ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); + ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); for (i = 0; i < OCELOT_TAG_LEN / 4; i++) ocelot_write_rix(ocelot, ifh[i], QS_INJ_WR, grp); @@ -1247,6 +1486,8 @@ EXPORT_SYMBOL(ocelot_port_inject_frame); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) { + lockdep_assert_held(&ocelot->xtr_lock); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) ocelot_read_rix(ocelot, QS_XTR_RD, grp); } @@ -2532,7 +2773,7 @@ int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio) ANA_PORT_QOS_CFG, port); - return 0; + return ocelot_update_vlan_reclassify_rule(ocelot, port); } EXPORT_SYMBOL_GPL(ocelot_port_set_default_prio); @@ -2929,6 +3170,8 @@ int ocelot_init(struct ocelot *ocelot) mutex_init(&ocelot->fwd_domain_lock); spin_lock_init(&ocelot->ptp_clock_lock); spin_lock_init(&ocelot->ts_id_lock); + spin_lock_init(&ocelot->inj_lock); + spin_lock_init(&ocelot->xtr_lock); ocelot->owq = alloc_ordered_workqueue("ocelot-owq", 0); if (!ocelot->owq) diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index 312a46832154..00326ae8c708 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -665,8 +665,7 @@ static int ocelot_fdma_prepare_skb(struct ocelot *ocelot, int port, u32 rew_op, ifh = skb_push(skb, OCELOT_TAG_LEN); skb_put(skb, ETH_FCS_LEN); - memset(ifh, 0, OCELOT_TAG_LEN); - ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); + ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); return 0; } diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 73cdec5ca6a3..5734b86aed5b 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -695,6 +695,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix, vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_MC, filter->dmac_mc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_BC, filter->dmac_bc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_VLAN_TAGGED, tag->tagged); + vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TPID, tag->tpid); vcap_key_set(vcap, &data, VCAP_IS1_HK_VID, tag->vid.value, tag->vid.mask); vcap_key_set(vcap, &data, VCAP_IS1_HK_PCP, diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 993212c3a7da..c09dd2e3343c 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -51,6 +51,8 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) struct ocelot *ocelot = arg; int grp = 0, err; + ocelot_lock_xtr_grp(ocelot, grp); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { struct sk_buff *skb; @@ -69,6 +71,8 @@ out: if (err < 0) ocelot_drain_cpu_queue(ocelot, 0); + ocelot_unlock_xtr_grp(ocelot, grp); + return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c index ec54b18c5fe7..a5e9b779c44d 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c @@ -124,8 +124,12 @@ static int ngbe_phylink_init(struct wx *wx) MAC_SYM_PAUSE | MAC_ASYM_PAUSE; config->mac_managed_pm = true; - phy_mode = PHY_INTERFACE_MODE_RGMII_ID; - __set_bit(PHY_INTERFACE_MODE_RGMII_ID, config->supported_interfaces); + /* The MAC only has add the Tx delay and it can not be modified. + * So just disable TX delay in PHY, and it is does not matter to + * internal phy. + */ + phy_mode = PHY_INTERFACE_MODE_RGMII_RXID; + __set_bit(PHY_INTERFACE_MODE_RGMII_RXID, config->supported_interfaces); phylink = phylink_create(config, NULL, phy_mode, &ngbe_mac_ops); if (IS_ERR(phylink)) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 8165f5f271ff..5a7d6b872f5d 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -172,6 +172,7 @@ #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ #define XAE_FMI_OFFSET 0x00000708 /* Frame Filter Control */ +#define XAE_FFE_OFFSET 0x0000070C /* Frame Filter Enable */ #define XAE_AF0_OFFSET 0x00000710 /* Address Filter 0 */ #define XAE_AF1_OFFSET 0x00000714 /* Address Filter 1 */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 38f7b764fe66..88ff82c52d0c 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -433,7 +433,7 @@ static int netdev_set_mac_address(struct net_device *ndev, void *p) */ static void axienet_set_multicast_list(struct net_device *ndev) { - int i; + int i = 0; u32 reg, af0reg, af1reg; struct axienet_local *lp = netdev_priv(ndev); @@ -451,7 +451,10 @@ static void axienet_set_multicast_list(struct net_device *ndev) } else if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; - i = 0; + reg = axienet_ior(lp, XAE_FMI_OFFSET); + reg &= ~XAE_FMI_PM_MASK; + axienet_iow(lp, XAE_FMI_OFFSET, reg); + netdev_for_each_mc_addr(ha, ndev) { if (i >= XAE_MULTICAST_CAM_TABLE_NUM) break; @@ -470,6 +473,7 @@ static void axienet_set_multicast_list(struct net_device *ndev) axienet_iow(lp, XAE_FMI_OFFSET, reg); axienet_iow(lp, XAE_AF0_OFFSET, af0reg); axienet_iow(lp, XAE_AF1_OFFSET, af1reg); + axienet_iow(lp, XAE_FFE_OFFSET, 1); i++; } } else { @@ -477,18 +481,15 @@ static void axienet_set_multicast_list(struct net_device *ndev) reg &= ~XAE_FMI_PM_MASK; axienet_iow(lp, XAE_FMI_OFFSET, reg); - - for (i = 0; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { - reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; - reg |= i; - - axienet_iow(lp, XAE_FMI_OFFSET, reg); - axienet_iow(lp, XAE_AF0_OFFSET, 0); - axienet_iow(lp, XAE_AF1_OFFSET, 0); - } - dev_info(&ndev->dev, "Promiscuous mode disabled.\n"); } + + for (; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { + reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; + reg |= i; + axienet_iow(lp, XAE_FMI_OFFSET, reg); + axienet_iow(lp, XAE_FFE_OFFSET, 0); + } } /** diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 87865918dab6..25e5bfbb6f89 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -555,7 +555,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, unsigned long rules) { const u16 mask = RTL8211F_LEDCR_MASK << (RTL8211F_LEDCR_SHIFT * index); - u16 reg = RTL8211F_LEDCR_MODE; /* Mode B */ + u16 reg = 0; if (index >= RTL8211F_LED_COUNT) return -EINVAL; @@ -575,6 +575,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, } reg <<= RTL8211F_LEDCR_SHIFT * index; + reg |= RTL8211F_LEDCR_MODE; /* Mode B */ return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg); } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 6c79fc43fa31..61ffa8851d7b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) if (err < 0) goto err_xdp_reg_mem_model; - virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); + virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); return 0; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1ae8b2351654..210fb77f51ba 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -498,7 +498,7 @@ static int pmem_attach_disk(struct device *dev, } if (fua) lim.features |= BLK_FEAT_FUA; - if (is_nd_pfn(dev)) + if (is_nd_pfn(dev) || pmem_should_map_pages(dev)) lim.features |= BLK_FEAT_DAX; if (!devm_request_mem_region(dev, res->start, resource_size(res), diff --git a/drivers/of/irq.c b/drivers/of/irq.c index c94203ce65bb..8fd63100ba8f 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -344,7 +344,8 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar struct device_node *p; const __be32 *addr; u32 intsize; - int i, res; + int i, res, addr_len; + __be32 addr_buf[3] = { 0 }; pr_debug("of_irq_parse_one: dev=%pOF, index=%d\n", device, index); @@ -353,13 +354,19 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar return of_irq_parse_oldworld(device, index, out_irq); /* Get the reg property (if any) */ - addr = of_get_property(device, "reg", NULL); + addr = of_get_property(device, "reg", &addr_len); + + /* Prevent out-of-bounds read in case of longer interrupt parent address size */ + if (addr_len > (3 * sizeof(__be32))) + addr_len = 3 * sizeof(__be32); + if (addr) + memcpy(addr_buf, addr, addr_len); /* Try the new-style interrupts-extended first */ res = of_parse_phandle_with_args(device, "interrupts-extended", "#interrupt-cells", index, out_irq); if (!res) - return of_irq_parse_raw(addr, out_irq); + return of_irq_parse_raw(addr_buf, out_irq); /* Look for the interrupt parent. */ p = of_irq_find_parent(device); @@ -389,7 +396,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar /* Check if there are any interrupt-map translations to process */ - res = of_irq_parse_raw(addr, out_irq); + res = of_irq_parse_raw(addr_buf, out_irq); out: of_node_put(p); return res; diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 7fc602e01487..7e89f547999b 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1354,7 +1354,8 @@ void ssam_controller_destroy(struct ssam_controller *ctrl) if (ctrl->state == SSAM_CONTROLLER_UNINITIALIZED) return; - WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED); + WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED && + ctrl->state != SSAM_CONTROLLER_INITIALIZED); /* * Note: New events could still have been received after the previous diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 1c4d74db08c9..a23dff35f8ca 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -265,16 +265,34 @@ static const struct software_node *ssam_node_group_sl5[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, - &ssam_node_tmp_perf_profile, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_hid_main_keyboard, + &ssam_node_hid_main_touchpad, + &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + +/* Devices for Surface Laptop 6. */ +static const struct software_node *ssam_node_group_sl6[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, &ssam_node_hid_main_keyboard, &ssam_node_hid_main_touchpad, &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_sensors, &ssam_node_hid_sam_ucm_ucsi, NULL, }; -/* Devices for Surface Laptop Studio. */ -static const struct software_node *ssam_node_group_sls[] = { +/* Devices for Surface Laptop Studio 1. */ +static const struct software_node *ssam_node_group_sls1[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, @@ -289,6 +307,22 @@ static const struct software_node *ssam_node_group_sls[] = { NULL, }; +/* Devices for Surface Laptop Studio 2. */ +static const struct software_node *ssam_node_group_sls2[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_pos_tablet_switch, + &ssam_node_hid_sam_keyboard, + &ssam_node_hid_sam_penstash, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* Devices for Surface Laptop Go. */ static const struct software_node *ssam_node_group_slg1[] = { &ssam_node_root, @@ -324,7 +358,7 @@ static const struct software_node *ssam_node_group_sp8[] = { NULL, }; -/* Devices for Surface Pro 9 */ +/* Devices for Surface Pro 9 and 10 */ static const struct software_node *ssam_node_group_sp9[] = { &ssam_node_root, &ssam_node_hub_kip, @@ -365,6 +399,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Pro 9 */ { "MSHW0343", (unsigned long)ssam_node_group_sp9 }, + /* Surface Pro 10 */ + { "MSHW0510", (unsigned long)ssam_node_group_sp9 }, + /* Surface Book 2 */ { "MSHW0107", (unsigned long)ssam_node_group_gen5 }, @@ -389,14 +426,23 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop 5 */ { "MSHW0350", (unsigned long)ssam_node_group_sl5 }, + /* Surface Laptop 6 */ + { "MSHW0530", (unsigned long)ssam_node_group_sl6 }, + /* Surface Laptop Go 1 */ { "MSHW0118", (unsigned long)ssam_node_group_slg1 }, /* Surface Laptop Go 2 */ { "MSHW0290", (unsigned long)ssam_node_group_slg1 }, - /* Surface Laptop Studio */ - { "MSHW0123", (unsigned long)ssam_node_group_sls }, + /* Surface Laptop Go 3 */ + { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, + + /* Surface Laptop Studio 1 */ + { "MSHW0123", (unsigned long)ssam_node_group_sls1 }, + + /* Surface Laptop Studio 2 */ + { "MSHW0360", (unsigned long)ssam_node_group_sls2 }, { }, }; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index cc735931f97b..37636e5a38e3 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -146,6 +146,20 @@ static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static int throttle_thermal_policy_write(struct asus_wmi *); +static const struct dmi_system_id asus_ally_mcu_quirk[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC71L"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC72L"), + }, + }, + { }, +}; + static bool ashs_present(void) { int i = 0; @@ -4685,7 +4699,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) - && dmi_match(DMI_BOARD_NAME, "RC71L"); + && dmi_check_system(asus_ally_mcu_quirk); if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE)) asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE; diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index 7fa360073f6e..404582307109 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -1549,8 +1549,7 @@ int tpmi_sst_dev_add(struct auxiliary_device *auxdev) goto unlock_free; } - ret = sst_main(auxdev, &pd_info[i]); - if (ret) { + if (sst_main(auxdev, &pd_info[i])) { /* * This entry is not valid, hardware can partially * populate dies. In this case MMIO will have 0xFFs. diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 0a97cfedd706..42a4a996defb 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1601,9 +1601,15 @@ static int dasd_ese_needs_format(struct dasd_block *block, struct irb *irb) if (!sense) return 0; - return !!(sense[1] & SNS1_NO_REC_FOUND) || - !!(sense[1] & SNS1_FILE_PROTECTED) || - scsw_cstat(&irb->scsw) == SCHN_STAT_INCORR_LEN; + if (sense[1] & SNS1_NO_REC_FOUND) + return 1; + + if ((sense[1] & SNS1_INV_TRACK_FORMAT) && + scsw_is_tm(&irb->scsw) && + !(sense[2] & SNS2_ENV_DATA_PRESENT)) + return 1; + + return 0; } static int dasd_ese_oos_cond(u8 *sense) @@ -1624,7 +1630,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, struct dasd_device *device; unsigned long now; int nrf_suppressed = 0; - int fp_suppressed = 0; + int it_suppressed = 0; struct request *req; u8 *sense = NULL; int expires; @@ -1679,8 +1685,9 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, */ sense = dasd_get_sense(irb); if (sense) { - fp_suppressed = (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); + it_suppressed = (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); nrf_suppressed = (sense[1] & SNS1_NO_REC_FOUND) && test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); @@ -1695,7 +1702,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, return; } } - if (!(fp_suppressed || nrf_suppressed)) + if (!(it_suppressed || nrf_suppressed)) device->discipline->dump_sense_dbf(device, irb, "int"); if (device->features & DASD_FEATURE_ERPLOG) @@ -2459,14 +2466,17 @@ retry: rc = 0; list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and error recovery would be - * unnecessary in these cases. Check if the according suppress - * bit is set. + * In some cases certain errors might be expected and + * error recovery would be unnecessary in these cases. + * Check if the according suppress bit is set. */ sense = dasd_get_sense(&cqr->irb); - if (sense && sense[1] & SNS1_FILE_PROTECTED && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags)) + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags)) + continue; + if (sense && (sense[1] & SNS1_NO_REC_FOUND) && + test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags)) continue; if (scsw_cstat(&cqr->irb.scsw) == 0x40 && test_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags)) diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index bbbacfc386f2..d0aa267462c5 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -1386,14 +1386,8 @@ dasd_3990_erp_file_prot(struct dasd_ccw_req * erp) struct dasd_device *device = erp->startdev; - /* - * In some cases the 'File Protected' error might be expected and - * log messages shouldn't be written then. - * Check if the according suppress bit is set. - */ - if (!test_bit(DASD_CQR_SUPPRESS_FP, &erp->flags)) - dev_err(&device->cdev->dev, - "Accessing the DASD failed because of a hardware error\n"); + dev_err(&device->cdev->dev, + "Accessing the DASD failed because of a hardware error\n"); return dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 9388b5c383ca..90b106408992 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2275,6 +2275,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); return cqr; } @@ -2556,7 +2557,6 @@ dasd_eckd_build_check_tcw(struct dasd_device *base, struct format_data_t *fdata, cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); return cqr; @@ -4130,8 +4130,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); } @@ -4633,9 +4631,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); } return cqr; @@ -5780,36 +5777,32 @@ static void dasd_eckd_dump_sense(struct dasd_device *device, { u8 *sense = dasd_get_sense(irb); - if (scsw_is_tm(&irb->scsw)) { - /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and log messages shouldn't be written - * then. Check if the according suppress bit is set. - */ - if (sense && (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &req->flags)) - return; - if (scsw_cstat(&irb->scsw) == 0x40 && - test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) - return; + /* + * In some cases certain errors might be expected and + * log messages shouldn't be written then. + * Check if the according suppress bit is set. + */ + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &req->flags)) + return; - dasd_eckd_dump_sense_tcw(device, req, irb); - } else { - /* - * In some cases the 'Command Reject' or 'No Record Found' - * error might be expected and log messages shouldn't be - * written then. Check if the according suppress bit is set. - */ - if (sense && sense[0] & SNS0_CMD_REJECT && - test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) - return; + if (sense && sense[0] & SNS0_CMD_REJECT && + test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) + return; - if (sense && sense[1] & SNS1_NO_REC_FOUND && - test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) - return; + if (sense && sense[1] & SNS1_NO_REC_FOUND && + test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) + return; + if (scsw_cstat(&irb->scsw) == 0x40 && + test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) + return; + + if (scsw_is_tm(&irb->scsw)) + dasd_eckd_dump_sense_tcw(device, req, irb); + else dasd_eckd_dump_sense_ccw(device, req, irb); - } } static int dasd_eckd_reload_device(struct dasd_device *device) diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 1aa426b1dedd..6da47a65af61 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -41,7 +41,6 @@ int dasd_gendisk_alloc(struct dasd_block *block) */ .max_segment_size = PAGE_SIZE, .seg_boundary_mask = PAGE_SIZE - 1, - .dma_alignment = PAGE_SIZE - 1, .max_segments = USHRT_MAX, }; struct gendisk *gdp; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index e5f40536b425..81cfb5c89681 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -196,7 +196,7 @@ struct dasd_ccw_req { * The following flags are used to suppress output of certain errors. */ #define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */ -#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/ +#define DASD_CQR_SUPPRESS_IT 5 /* Suppress 'Invalid Track' error*/ #define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */ #define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */ diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 8b0eded6ef36..01f035f9330e 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -100,7 +100,8 @@ retry_trace: dprint_init(mrioc, "trying to allocate trace diag buffer of size = %dKB\n", trace_size / 1024); - if (mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { + if (get_order(trace_size) > MAX_PAGE_ORDER || + mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { retry = true; trace_size -= trace_dec_size; dprint_init(mrioc, "trace diag buffer allocation failed\n" @@ -118,8 +119,12 @@ retry_fw: diag_buffer->type = MPI3_DIAG_BUFFER_TYPE_FW; diag_buffer->status = MPI3MR_HDB_BUFSTATUS_NOT_ALLOCATED; if ((mrioc->facts.diag_fw_sz < fw_size) && (fw_size >= fw_min_size)) { - diag_buffer->addr = dma_alloc_coherent(&mrioc->pdev->dev, - fw_size, &diag_buffer->dma_addr, GFP_KERNEL); + if (get_order(fw_size) <= MAX_PAGE_ORDER) { + diag_buffer->addr + = dma_alloc_coherent(&mrioc->pdev->dev, fw_size, + &diag_buffer->dma_addr, + GFP_KERNEL); + } if (!retry) dprint_init(mrioc, "%s:trying to allocate firmware diag buffer of size = %dKB\n", diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index ca8f132e03ae..616894571c6a 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -5234,6 +5234,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_lock_init(&mrioc->watchdog_lock); spin_lock_init(&mrioc->chain_buf_lock); spin_lock_init(&mrioc->sas_node_lock); + spin_lock_init(&mrioc->trigger_lock); INIT_LIST_HEAD(&mrioc->fwevt_list); INIT_LIST_HEAD(&mrioc->tgtdev_list); diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 7e9074519ad2..4dc8aba33d9b 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2546,11 +2546,6 @@ release_lock: } EXPORT_SYMBOL(qman_delete_cgr); -struct cgr_comp { - struct qman_cgr *cgr; - struct completion completion; -}; - static void qman_delete_cgr_smp_call(void *p) { qman_delete_cgr((struct qman_cgr *)p); diff --git a/drivers/staging/media/atomisp/include/linux/atomisp.h b/drivers/staging/media/atomisp/include/linux/atomisp.h index 16c9da172c03..fefbe3cd08f3 100644 --- a/drivers/staging/media/atomisp/include/linux/atomisp.h +++ b/drivers/staging/media/atomisp/include/linux/atomisp.h @@ -20,7 +20,6 @@ #define _ATOM_ISP_H #include <linux/types.h> -#include <linux/version.h> /* struct media_device_info.hw_revision */ #define ATOMISP_HW_REVISION_MASK 0x0000ff00 diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index 4a2e869b9538..daed67d19efb 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -13,6 +13,28 @@ #include "thermal_core.h" +static void bang_bang_set_instance_target(struct thermal_instance *instance, + unsigned int target) +{ + if (instance->target != 0 && instance->target != 1 && + instance->target != THERMAL_NO_TARGET) + pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", + instance->target, instance->name); + + /* + * Enable the fan when the trip is crossed on the way up and disable it + * when the trip is crossed on the way down. + */ + instance->target = target; + instance->initialized = true; + + dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); + + mutex_lock(&instance->cdev->lock); + __thermal_cdev_update(instance->cdev); + mutex_unlock(&instance->cdev->lock); +} + /** * bang_bang_control - controls devices associated with the given zone * @tz: thermal_zone_device @@ -54,33 +76,60 @@ static void bang_bang_control(struct thermal_zone_device *tz, tz->temperature, trip->hysteresis); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip) - continue; + if (instance->trip == trip) + bang_bang_set_instance_target(instance, crossed_up); + } +} + +static void bang_bang_manage(struct thermal_zone_device *tz) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; - if (instance->target != 0 && instance->target != 1 && - instance->target != THERMAL_NO_TARGET) - pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", - instance->target, instance->name); + /* If the code below has run already, nothing needs to be done. */ + if (tz->governor_data) + return; - /* - * Enable the fan when the trip is crossed on the way up and - * disable it when the trip is crossed on the way down. - */ - instance->target = crossed_up; + for_each_trip_desc(tz, td) { + const struct thermal_trip *trip = &td->trip; - dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); + if (tz->temperature >= td->threshold || + trip->temperature == THERMAL_TEMP_INVALID || + trip->type == THERMAL_TRIP_CRITICAL || + trip->type == THERMAL_TRIP_HOT) + continue; - mutex_lock(&instance->cdev->lock); - instance->cdev->updated = false; /* cdev needs update */ - mutex_unlock(&instance->cdev->lock); + /* + * If the initial cooling device state is "on", but the zone + * temperature is not above the trip point, the core will not + * call bang_bang_control() until the zone temperature reaches + * the trip point temperature which may be never. In those + * cases, set the initial state of the cooling device to 0. + */ + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (!instance->initialized && instance->trip == trip) + bang_bang_set_instance_target(instance, 0); + } } - list_for_each_entry(instance, &tz->thermal_instances, tz_node) - thermal_cdev_update(instance->cdev); + tz->governor_data = (void *)true; +} + +static void bang_bang_update_tz(struct thermal_zone_device *tz, + enum thermal_notify_event reason) +{ + /* + * Let bang_bang_manage() know that it needs to walk trips after binding + * a new cdev and after system resume. + */ + if (reason == THERMAL_TZ_BIND_CDEV || reason == THERMAL_TZ_RESUME) + tz->governor_data = NULL; } static struct thermal_governor thermal_gov_bang_bang = { .name = "bang_bang", .trip_crossed = bang_bang_control, + .manage = bang_bang_manage, + .update_tz = bang_bang_update_tz, }; THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang); diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 95c399f94744..e6669aeda1ff 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1728,7 +1728,8 @@ static void thermal_zone_device_resume(struct work_struct *work) thermal_debug_tz_resume(tz); thermal_zone_device_init(tz); - __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + thermal_governor_update_tz(tz, THERMAL_TZ_RESUME); + __thermal_zone_device_update(tz, THERMAL_TZ_RESUME); complete(&tz->resume); tz->resuming = false; diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 11185cc1db92..9ed4bb2e8d05 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -323,16 +323,17 @@ static ssize_t port_sb_regs_write(struct file *file, const char __user *user_buf if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(port, port_sb_regs, ARRAY_SIZE(port_sb_regs), USB4_SB_TARGET_ROUTER, 0, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&sw->dev); pm_runtime_put_autosuspend(&sw->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } @@ -355,16 +356,17 @@ static ssize_t retimer_sb_regs_write(struct file *file, if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(rt->port, retimer_sb_regs, ARRAY_SIZE(retimer_sb_regs), USB4_SB_TARGET_RETIMER, rt->index, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&rt->dev); pm_runtime_put_autosuspend(&rt->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 326433df5880..6a2116cbb06f 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3392,6 +3392,7 @@ void tb_switch_remove(struct tb_switch *sw) tb_switch_remove(port->remote->sw); port->remote = NULL; } else if (port->xdomain) { + port->xdomain->is_unplugged = true; tb_xdomain_remove(port->xdomain); port->xdomain = NULL; } diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 1af9aed99c65..afef1dd4ddf4 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -27,7 +27,6 @@ #include <linux/pm_wakeirq.h> #include <linux/dma-mapping.h> #include <linux/sys_soc.h> -#include <linux/pm_domain.h> #include "8250.h" @@ -119,12 +118,6 @@ #define UART_OMAP_TO_L 0x26 #define UART_OMAP_TO_H 0x27 -/* - * Copy of the genpd flags for the console. - * Only used if console suspend is disabled - */ -static unsigned int genpd_flags_console; - struct omap8250_priv { void __iomem *membase; int line; @@ -1655,7 +1648,6 @@ static int omap8250_suspend(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err = 0; serial8250_suspend_port(priv->line); @@ -1666,19 +1658,8 @@ static int omap8250_suspend(struct device *dev) if (!device_may_wakeup(dev)) priv->wer = 0; serial_out(up, UART_OMAP_WER, priv->wer); - if (uart_console(&up->port)) { - if (console_suspend_enabled) - err = pm_runtime_force_suspend(dev); - else { - /* - * The pd shall not be powered-off (no console suspend). - * Make copy of genpd flags before to set it always on. - * The original value is restored during the resume. - */ - genpd_flags_console = genpd->flags; - genpd->flags |= GENPD_FLAG_ALWAYS_ON; - } - } + if (uart_console(&up->port) && console_suspend_enabled) + err = pm_runtime_force_suspend(dev); flush_work(&priv->qos_work); return err; @@ -1688,16 +1669,12 @@ static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err; if (uart_console(&up->port) && console_suspend_enabled) { - if (console_suspend_enabled) { - err = pm_runtime_force_resume(dev); - if (err) - return err; - } else - genpd->flags = genpd_flags_console; + err = pm_runtime_force_resume(dev); + if (err) + return err; } serial8250_resume_port(priv->line); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 0a90964d6d10..09b246c9e389 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2514,7 +2514,7 @@ static const struct uart_ops atmel_pops = { }; static const struct serial_rs485 atmel_rs485_supported = { - .flags = SER_RS485_ENABLED | SER_RS485_RTS_AFTER_SEND | SER_RS485_RX_DURING_TX, + .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RX_DURING_TX, .delay_rts_before_send = 1, .delay_rts_after_send = 1, }; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 615291ea9b5e..77efa7ee6eda 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2923,6 +2923,7 @@ static int lpuart_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); ret = lpuart_global_reset(sport); if (ret) diff --git a/drivers/tty/vt/conmakehash.c b/drivers/tty/vt/conmakehash.c index 82d9db68b2ce..a931fcde7ad9 100644 --- a/drivers/tty/vt/conmakehash.c +++ b/drivers/tty/vt/conmakehash.c @@ -11,8 +11,6 @@ * Copyright (C) 1995-1997 H. Peter Anvin */ -#include <libgen.h> -#include <linux/limits.h> #include <stdio.h> #include <stdlib.h> #include <sysexits.h> @@ -79,7 +77,6 @@ int main(int argc, char *argv[]) { FILE *ctbl; const char *tblname; - char base_tblname[PATH_MAX]; char buffer[65536]; int fontlen; int i, nuni, nent; @@ -245,20 +242,15 @@ int main(int argc, char *argv[]) for ( i = 0 ; i < fontlen ; i++ ) nuni += unicount[i]; - strncpy(base_tblname, tblname, PATH_MAX); - base_tblname[PATH_MAX - 1] = 0; printf("\ /*\n\ - * Do not edit this file; it was automatically generated by\n\ - *\n\ - * conmakehash %s > [this file]\n\ - *\n\ + * Automatically generated file; Do not edit.\n\ */\n\ \n\ #include <linux/types.h>\n\ \n\ u8 dfont_unicount[%d] = \n\ -{\n\t", basename(base_tblname), fontlen); +{\n\t", fontlen); for ( i = 0 ; i < fontlen ; i++ ) { diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index d7654f475daf..937ce5fd5809 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1872,7 +1872,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); - for (i = 0; i < xhci->max_interrupters; i++) { + for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) { if (xhci->interrupters[i]) { xhci_remove_interrupter(xhci, xhci->interrupters[i]); xhci_free_interrupter(xhci, xhci->interrupters[i]); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b7517c3c8059..4ea2c3e072a9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2910,6 +2910,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event); else process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event); + return 0; check_endpoint_halted: if (xhci_halted_host_endpoint(ep_ctx, trb_comp_code)) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0a8cf6c17f82..efdf4c228b8c 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2837,7 +2837,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, xhci->num_active_eps); return -ENOMEM; } - if ((xhci->quirks & XHCI_SW_BW_CHECKING) && + if ((xhci->quirks & XHCI_SW_BW_CHECKING) && !ctx_change && xhci_reserve_bandwidth(xhci, virt_dev, command->in_ctx)) { if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) xhci_free_host_resources(xhci, ctrl_ctx); @@ -4200,8 +4200,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); xhci_free_virt_device(xhci, udev->slot_id); - if (!ret) - xhci_alloc_dev(hcd, udev); + if (!ret) { + if (xhci_alloc_dev(hcd, udev) == 1) + xhci_setup_addressable_virt_dev(xhci, udev); + } kfree(command->completion); kfree(command); return -EPROTO; diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 2d30fc1be306..1a8d5e80b9ae 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -169,6 +169,7 @@ static const struct acpi_device_id ljca_gpio_hids[] = { { "INTC1096" }, { "INTC100B" }, { "INTC10D1" }, + { "INTC10B5" }, {}, }; diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index cce39818e99a..4b02d6474259 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5655,7 +5655,6 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - port->pd_events = 0; if (port->self_powered) tcpm_set_cc(port, TYPEC_CC_OPEN); else diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 432a2d6266d7..4039851551c1 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -137,7 +137,7 @@ static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci, if (ret) return ret; - return err; + return err ?: UCSI_CCI_LENGTH(*cci); } static int ucsi_read_error(struct ucsi *ucsi, u8 connector_num) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d9c5a92fa708..fd3a2522bc3e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -196,75 +196,71 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) return DIV_ROUND_UP(bytes, sizeof(u64)); } -int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); int ret = 0; /* allow for unknown fields */ - bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, - alloc_v1_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), + c, alloc_v1_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); fsck_err: return ret; } -int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err, - alloc_v4_val_size_bad, + bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), + c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, - alloc_v4_backpointers_start_bad, + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), + c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, - alloc_key_data_type_bad, + bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, + c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); for (unsigned i = 0; i < 2; i++) bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, - c, err, - alloc_key_io_time_bad, + c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", a.v->io_time[i], LRU_TIME_MAX); @@ -282,7 +278,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || a.v->cached_sectors || a.v->stripe, - c, err, alloc_key_empty_but_have_data, + c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, a.v->dirty_sectors, @@ -296,7 +292,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_parity: bkey_fsck_err_on(!a.v->dirty_sectors && !stripe_sectors, - c, err, alloc_key_dirty_sectors_0, + c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", bch2_data_type_str(a.v->data_type)); break; @@ -305,12 +301,12 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || stripe_sectors || a.v->stripe, - c, err, alloc_key_cached_inconsistency, + c, alloc_key_cached_inconsistency, "data type inconsistency"); bkey_fsck_err_on(!a.v->io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, - c, err, alloc_key_cached_but_read_time_zero, + c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: @@ -513,14 +509,13 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) : 0; } -int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, - bucket_gens_val_size_bad, + bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), + c, bucket_gens_val_size_bad, "bad val size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); fsck_err: @@ -829,7 +824,19 @@ int bch2_trigger_alloc(struct btree_trans *trans, struct bch_alloc_v4 old_a_convert; const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; + + struct bch_alloc_v4 *new_a; + if (likely(new.k->type == KEY_TYPE_alloc_v4)) { + new_a = bkey_s_to_alloc_v4(new).v; + } else { + BUG_ON(!(flags & BTREE_TRIGGER_gc)); + + struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); + ret = PTR_ERR_OR_ZERO(new_ka); + if (unlikely(ret)) + goto err; + new_a = &new_ka->v; + } if (flags & BTREE_TRIGGER_transactional) { alloc_data_type_set(new_a, new_a->data_type); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 96a0444ea78f..fd790b03fbe1 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -150,7 +150,9 @@ static inline void alloc_data_type_set(struct bch_alloc_v4 *a, enum bch_data_typ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { - return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; + return a.data_type == BCH_DATA_cached + ? a.io_time[READ] & LRU_TIME_MAX + : 0; } #define DATA_TYPES_MOVABLE \ @@ -240,52 +242,48 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v1_invalid, \ + .key_validate = bch2_alloc_v1_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v2_invalid, \ + .key_validate = bch2_alloc_v2_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v3_invalid, \ + .key_validate = bch2_alloc_v3_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v4_invalid, \ + .key_validate = bch2_alloc_v4_validate, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) -int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ - .key_invalid = bch2_bucket_gens_invalid, \ + .key_validate = bch2_bucket_gens_validate, \ .val_to_text = bch2_bucket_gens_to_text, \ }) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 3cc02479a982..d4da6343efa9 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -47,9 +47,8 @@ static bool extent_matches_bp(struct bch_fs *c, return false; } -int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); @@ -68,8 +67,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || !bpos_eq(bp.k->p, bp_pos), - c, err, - backpointer_bucket_offset_wrong, + c, backpointer_bucket_offset_wrong, "backpointer bucket_offset wrong"); fsck_err: return ret; @@ -763,27 +761,22 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, btree < BTREE_ID_NR && !ret; btree++) { unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; - struct btree_iter iter; - struct btree *b; if (!(BIT_ULL(btree) & btree_leaf_mask) && !(BIT_ULL(btree) & btree_interior_mask)) continue; - bch2_trans_begin(trans); - - __for_each_btree_node(trans, iter, btree, + ret = __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, - 0, depth, BTREE_ITER_prefetch, b, ret) { + 0, depth, BTREE_ITER_prefetch, b, ({ mem_may_pin -= btree_buf_bytes(b); if (mem_may_pin <= 0) { c->btree_cache.pinned_nodes_end = *end = BBPOS(btree, b->key.k.p); - bch2_trans_iter_exit(trans, &iter); - return 0; + break; } - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } return ret; diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 6021de1c5e98..7daecadb764e 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -18,14 +18,13 @@ static inline u64 swab40(u64 x) ((x & 0xff00000000ULL) >> 32)); } -int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, - enum bch_validate_flags, struct printbuf *); +int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_backpointer_swab(struct bkey_s); #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ - .key_invalid = bch2_backpointer_invalid, \ + .key_validate = bch2_backpointer_validate, \ .val_to_text = bch2_backpointer_k_to_text, \ .swab = bch2_backpointer_swab, \ .min_val_size = 32, \ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index eedf2d6045e7..0c7086e00d18 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -447,6 +447,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(blocked_journal_low_on_space) \ x(blocked_journal_low_on_pin) \ x(blocked_journal_max_in_flight) \ + x(blocked_key_cache_flush) \ x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index b25f86356728..c75f2e0f32bb 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -676,7 +676,8 @@ struct bch_sb_field_ext { x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ x(disk_accounting_v2, BCH_VERSION(1, 9)) \ - x(disk_accounting_v3, BCH_VERSION(1, 10)) + x(disk_accounting_v3, BCH_VERSION(1, 10)) \ + x(disk_accounting_inum, BCH_VERSION(1, 11)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 936357149cf0..e34cb2bf329c 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -10,9 +10,10 @@ #include "vstructs.h" enum bch_validate_flags { - BCH_VALIDATE_write = (1U << 0), - BCH_VALIDATE_commit = (1U << 1), - BCH_VALIDATE_journal = (1U << 2), + BCH_VALIDATE_write = BIT(0), + BCH_VALIDATE_commit = BIT(1), + BCH_VALIDATE_journal = BIT(2), + BCH_VALIDATE_silent = BIT(3), }; #if 0 diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5f07cf853d0c..88d8958281e8 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -27,27 +27,27 @@ const char * const bch2_bkey_types[] = { NULL }; -static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } #define bch2_bkey_ops_deleted ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) #define bch2_bkey_ops_whiteout ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) -static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, - bkey_val_size_nonzero, + bkey_fsck_err_on(bkey_val_bytes(k.k), + c, bkey_val_size_nonzero, "incorrect value size (%zu != 0)", bkey_val_bytes(k.k)); fsck_err: @@ -55,11 +55,11 @@ fsck_err: } #define bch2_bkey_ops_error ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -73,17 +73,17 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, } #define bch2_bkey_ops_cookie ((struct bkey_ops) { \ - .key_invalid = key_type_cookie_invalid, \ + .key_validate = key_type_cookie_validate, \ .val_to_text = key_type_cookie_to_text, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -98,9 +98,9 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, datalen, min(datalen, 32U), d.v->data); } -#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ - .key_invalid = key_type_inline_data_invalid, \ - .val_to_text = key_type_inline_data_to_text, \ +#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ + .key_validate = key_type_inline_data_validate, \ + .val_to_text = key_type_inline_data_to_text, \ }) static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) @@ -110,7 +110,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ } #define bch2_bkey_ops_set ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ .key_merge = key_type_set_merge, \ }) @@ -123,9 +123,8 @@ const struct bkey_ops bch2_bkey_ops[] = { const struct bkey_ops bch2_bkey_null_ops = { }; -int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; @@ -133,15 +132,15 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, - bkey_val_size_too_small, + bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, + c, bkey_val_size_too_small, "bad val size (%zu < %u)", bkey_val_bytes(k.k), ops->min_val_size); - if (!ops->key_invalid) + if (!ops->key_validate) return 0; - ret = ops->key_invalid(c, k, flags, err); + ret = ops->key_validate(c, k, flags); fsck_err: return ret; } @@ -161,18 +160,17 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); } -int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, - enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) +int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; int ret = 0; - bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, - bkey_u64s_too_small, + bkey_fsck_err_on(k.k->u64s < BKEY_U64s, + c, bkey_u64s_too_small, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); if (type >= BKEY_TYPE_NR) @@ -180,8 +178,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, - bkey_invalid_type_for_btree, + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), + c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", bch2_btree_node_type_str(type), k.k->type < KEY_TYPE_MAX @@ -189,17 +187,17 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, : "(unknown)"); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { - bkey_fsck_err_on(k.k->size == 0, c, err, - bkey_extent_size_zero, + bkey_fsck_err_on(k.k->size == 0, + c, bkey_extent_size_zero, "size == 0"); - bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, - bkey_extent_size_greater_than_offset, + bkey_fsck_err_on(k.k->size > k.k->p.offset, + c, bkey_extent_size_greater_than_offset, "size greater than offset (%u > %llu)", k.k->size, k.k->p.offset); } else { - bkey_fsck_err_on(k.k->size, c, err, - bkey_size_nonzero, + bkey_fsck_err_on(k.k->size, + c, bkey_size_nonzero, "size != 0"); } @@ -207,12 +205,12 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_id btree = type - 1; if (btree_type_has_snapshots(btree)) { - bkey_fsck_err_on(!k.k->p.snapshot, c, err, - bkey_snapshot_zero, + bkey_fsck_err_on(!k.k->p.snapshot, + c, bkey_snapshot_zero, "snapshot == 0"); } else if (!btree_type_has_snapshot_field(btree)) { - bkey_fsck_err_on(k.k->p.snapshot, c, err, - bkey_snapshot_nonzero, + bkey_fsck_err_on(k.k->p.snapshot, + c, bkey_snapshot_nonzero, "nonzero snapshot"); } else { /* @@ -221,34 +219,33 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, */ } - bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, - bkey_at_pos_max, + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), + c, bkey_at_pos_max, "key at POS_MAX"); } fsck_err: return ret; } -int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) + enum bch_validate_flags flags) { - return __bch2_bkey_invalid(c, k, type, flags, err) ?: - bch2_bkey_val_invalid(c, k, flags, err); + return __bch2_bkey_validate(c, k, type, flags) ?: + bch2_bkey_val_validate(c, k, flags); } int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, struct printbuf *err) + struct bkey_s_c k, enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, - bkey_before_start_of_btree_node, + bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), + c, bkey_before_start_of_btree_node, "key before start of btree node"); - bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, - bkey_after_end_of_btree_node, + bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), + c, bkey_after_end_of_btree_node, "key past end of btree node"); fsck_err: return ret; diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index baef0722f5fb..3df3dd2723a1 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -14,15 +14,15 @@ extern const char * const bch2_bkey_types[]; extern const struct bkey_ops bch2_bkey_null_ops; /* - * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If + * key_validate: checks validity of @k, returns 0 if good or -EINVAL if bad. If * invalid, entire key will be deleted. * * When invalid, error string is returned via @err. @rw indicates whether key is * being read or written; more aggressive checks can be enabled when rw == WRITE. */ struct bkey_ops { - int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err); + int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(struct bkey_s); @@ -48,14 +48,13 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) : &bch2_bkey_null_ops; } -int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, - struct bkey_s_c, struct printbuf *); +int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6cbf2aa6a947..eb3002c4eae7 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -741,12 +741,9 @@ fsck_err: static int bch2_mark_superblocks(struct bch_fs *c) { - mutex_lock(&c->sb_lock); gc_pos_set(c, gc_phase(GC_PHASE_sb)); - int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); - mutex_unlock(&c->sb_lock); - return ret; + return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); } static void bch2_gc_free(struct bch_fs *c) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2c424435ca4a..56ea9a77cd4a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -836,14 +836,13 @@ fsck_err: return ret; } -static int bset_key_invalid(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, - bool updated_range, int rw, - struct printbuf *err) +static int bset_key_validate(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, + bool updated_range, int rw) { - return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: - (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: - (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); + return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: + (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: + (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); } static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, @@ -858,12 +857,9 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, if (!bkeyp_u64s_valid(&b->format, k)) return false; - struct printbuf buf = PRINTBUF; struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf); - printbuf_exit(&buf); - return ret; + return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); } static int validate_bset_keys(struct bch_fs *c, struct btree *b, @@ -915,19 +911,11 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { - printbuf_reset(&buf); - bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "invalid bkey: %s", buf.buf); + ret = bset_key_validate(c, b, u.s_c, updated_range, write); + if (ret == -BCH_ERR_fsck_delete_bkey) goto drop_this_key; - } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, @@ -1228,23 +1216,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - - if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) || + ret = bch2_bkey_val_validate(c, u.s_c, READ); + if (ret == -BCH_ERR_fsck_delete_bkey || (bch2_inject_invalid_keys && !bversion_cmp(u.k->version, MAX_VERSION))) { - printbuf_reset(&buf); - - prt_printf(&buf, "invalid bkey: "); - bch2_bkey_val_invalid(c, u.s_c, READ, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "%s", buf.buf); - btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); @@ -1253,6 +1228,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, set_btree_bset_end(b, b->set); continue; } + if (ret) + goto fsck_err; if (u.k->type == KEY_TYPE_btree_ptr_v2) { struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); @@ -1767,6 +1744,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, set_btree_node_read_in_flight(b); + /* we can't pass the trans to read_done() for fsck errors, so it must be unlocked */ + bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { @@ -1952,18 +1931,14 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - struct printbuf buf = PRINTBUF; bool saw_error; - int ret; - - ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), - BKEY_TYPE_btree, WRITE, &buf); - if (ret) - bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); - printbuf_exit(&buf); - if (ret) + int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), + BKEY_TYPE_btree, WRITE); + if (ret) { + bch2_fs_inconsistent(c, "invalid btree node key before write"); return ret; + } ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index aa8a049071f4..2e84d22e17bd 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1900,6 +1900,7 @@ err: goto out; } +/* Only kept for -tools */ struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) { struct btree *b; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c7725865309c..dca62375d7d3 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -600,23 +600,35 @@ void bch2_trans_srcu_unlock(struct btree_trans *); u32 bch2_trans_begin(struct btree_trans *); -/* - * XXX - * this does not handle transaction restarts from bch2_btree_iter_next_node() - * correctly - */ -#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _locks_want, _depth, _flags, _b, _ret) \ - for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ - _start, _locks_want, _depth, _flags); \ - (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \ - !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \ - (_b) = bch2_btree_iter_next_node(&(_iter))) +#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + _locks_want, _depth, _flags, _b, _do) \ +({ \ + bch2_trans_begin((_trans)); \ + \ + struct btree_iter _iter; \ + bch2_trans_node_iter_init((_trans), &_iter, (_btree_id), \ + _start, _locks_want, _depth, _flags); \ + int _ret3 = 0; \ + do { \ + _ret3 = lockrestart_do((_trans), ({ \ + struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ + if (!_b) \ + break; \ + \ + PTR_ERR_OR_ZERO(_b) ?: (_do); \ + })) ?: \ + lockrestart_do((_trans), \ + PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ + } while (!_ret3); \ + \ + bch2_trans_iter_exit((_trans), &(_iter)); \ + _ret3; \ +}) #define for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _flags, _b, _ret) \ - __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - 0, 0, _flags, _b, _ret) + _flags, _b, _do) \ + __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + 0, 0, _flags, _b, _do) static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, unsigned flags) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index f2f2e525460b..79954490627c 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -497,11 +497,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path path->l[1].b = NULL; - if (bch2_btree_node_relock_notrace(trans, path, 0)) { - path->uptodate = BTREE_ITER_UPTODATE; - return 0; - } - int ret; do { ret = btree_path_traverse_cached_fast(trans, path); diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index e6b2cd0dd2c1..51d6289b8dee 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -11,13 +11,27 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) return max_t(ssize_t, 0, nr_dirty - max_dirty); } -static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +static inline ssize_t __bch2_btree_key_cache_must_wait(struct bch_fs *c) { size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); size_t max_dirty = 4096 + (nr_keys * 3) / 4; - return nr_dirty > max_dirty; + return nr_dirty - max_dirty; +} + +static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +{ + return __bch2_btree_key_cache_must_wait(c) > 0; +} + +static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) +{ + size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); + size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); + size_t max_dirty = 2048 + (nr_keys * 5) / 8; + + return nr_dirty <= max_dirty; } int bch2_btree_key_cache_journal_flush(struct journal *, diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 001107226377..b28c649c6838 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -530,7 +530,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); printbuf_exit(&buf); - BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL)); + BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); if (ret) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index cca336fe46e9..a0101d9c5d83 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -712,7 +712,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, a->k.version = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); BUG_ON(bversion_zero(a->k.version)); - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false); + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); if (ret) goto revert_fs_usage; } @@ -798,7 +798,7 @@ revert_fs_usage: struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, false); + bch2_accounting_mem_mod_locked(trans, a.c, false, false); bch2_accounting_neg(a); } percpu_up_read(&c->mark_lock); @@ -818,50 +818,6 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); } -static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, - enum bch_validate_flags flags, - struct btree_insert_entry *i, - struct printbuf *err) -{ - struct bch_fs *c = trans->c; - - printbuf_reset(err); - prt_printf(err, "invalid bkey on insert from %s -> %ps\n", - trans->fn, (void *) i->ip_allocated); - printbuf_indent_add(err, 2); - - bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); - prt_newline(err); - - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); - bch2_print_string_as_lines(KERN_ERR, err->buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - -static noinline int bch2_trans_commit_journal_entry_invalid(struct btree_trans *trans, - struct jset_entry *i) -{ - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; - - prt_printf(&buf, "invalid bkey on insert from %s\n", trans->fn); - printbuf_indent_add(&buf, 2); - - bch2_journal_entry_to_text(&buf, c, i); - prt_newline(&buf); - - bch2_print_string_as_lines(KERN_ERR, buf.buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - static int bch2_trans_commit_journal_pin_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { @@ -927,7 +883,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags static int journal_reclaim_wait_done(struct bch_fs *c) { int ret = bch2_journal_error(&c->journal) ?: - !bch2_btree_key_cache_must_wait(c); + bch2_btree_key_cache_wait_done(c); if (!ret) journal_reclaim_kick(&c->journal); @@ -973,9 +929,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, bch2_trans_unlock(trans); trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], true); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); + + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], false); + if (ret < 0) break; @@ -1060,20 +1020,19 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; trans_for_each_update(trans, i) { - struct printbuf buf = PRINTBUF; enum bch_validate_flags invalid_flags = 0; if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), - i->bkey_type, invalid_flags, &buf))) - ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); - btree_insert_entry_checks(trans, i); - printbuf_exit(&buf); - - if (ret) + ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), + i->bkey_type, invalid_flags); + if (unlikely(ret)){ + bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", + trans->fn, (void *) i->ip_allocated); return ret; + } + btree_insert_entry_checks(trans, i); } for (struct jset_entry *i = trans->journal_entries; @@ -1084,13 +1043,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, - CPU_BIG_ENDIAN, invalid_flags))) - ret = bch2_trans_commit_journal_entry_invalid(trans, i); - - if (ret) + ret = bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, + CPU_BIG_ENDIAN, invalid_flags); + if (unlikely(ret)) { + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", + trans->fn); return ret; + } } if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index e61f9695771e..b3454d4619e8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1364,18 +1364,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf) ?: - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { - printbuf_reset(&buf); - prt_printf(&buf, "inserting invalid bkey\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); - prt_printf(&buf, "\n "); - bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf); - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); - - bch2_fs_inconsistent(c, "%s", buf.buf); + if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), + btree_node_type(b), BCH_VALIDATE_write) ?: + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { + bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); dump_stack(); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 9f7004e941ce..be2bbd248631 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -810,6 +810,20 @@ static int __trigger_extent(struct btree_trans *trans, ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); if (ret) return ret; + } else { + bool insert = !(flags & BTREE_TRIGGER_overwrite); + struct disk_accounting_pos acc_inum_key = { + .type = BCH_DISK_ACCOUNTING_inum, + .inum.inum = k.k->p.inode, + }; + s64 v[3] = { + insert ? 1 : -1, + insert ? k.k->size : -((s64) k.k->size), + replicas_sectors, + }; + ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); + if (ret) + return ret; } if (bch2_bkey_rebalance_opts(k)) { @@ -901,7 +915,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - struct bch_fs *c = trans->c; struct btree_iter iter; int ret = 0; @@ -911,7 +924,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bucket_metadata_type_mismatch, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", @@ -1032,13 +1045,18 @@ static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, enum btree_iter_update_trigger_flags flags) { - struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; + struct bch_fs *c = trans->c; + + mutex_lock(&c->sb_lock); + struct bch_sb_layout layout = ca->disk_sb.sb->layout; + mutex_unlock(&c->sb_lock); + u64 bucket = 0; unsigned i, bucket_sectors = 0; int ret; - for (i = 0; i < layout->nr_superblocks; i++) { - u64 offset = le64_to_cpu(layout->sb_offset[i]); + for (i = 0; i < layout.nr_superblocks; i++) { + u64 offset = le64_to_cpu(layout.sb_offset[i]); if (offset == BCH_SB_SECTOR) { ret = bch2_trans_mark_metadata_sectors(trans, ca, @@ -1049,7 +1067,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c } ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, - offset + (1 << layout->sb_max_size_bits), + offset + (1 << layout.sb_max_size_bits), BCH_DATA_sb, &bucket, &bucket_sectors, flags); if (ret) return ret; diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index ec1b636ef78d..f70eb2127d32 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -93,7 +93,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, .dev_bucket = (u64) dev << 56 | bucket, .journal_seq = journal_seq, }; - size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0; + size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; int ret = 0; mutex_lock(&b->lock); @@ -106,7 +106,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, for (i = 0; i < size; i++) nr_elements += t->d[i].journal_seq > flushed_seq; - new_bits = t->bits + (nr_elements * 3 > size); + new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { @@ -115,7 +115,14 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, } retry_rehash: + if (nr_rehashes_this_size == 3) { + new_bits++; + nr_rehashes_this_size = 0; + } + nr_rehashes++; + nr_rehashes_this_size++; + bucket_table_init(n, new_bits); tmp = new; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0087b8555ead..6a854c918496 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -250,10 +250,8 @@ restart_drop_extra_replicas: * it's been hard to reproduce, so this should give us some more * information when it does occur: */ - struct printbuf err = PRINTBUF; - int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err); - printbuf_exit(&err); - + int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), + BCH_VALIDATE_commit); if (invalid) { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ebabab171fe5..45aec1afdb0e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -397,47 +397,27 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans *trans; - struct btree_iter iter; - struct btree *b; - ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - ret = flush_buf(i); + ssize_t ret = flush_buf(i); if (ret) return ret; if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - trans = bch2_trans_get(i->c); -retry: - bch2_trans_begin(trans); - - for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { - bch2_btree_node_to_text(&i->buf, i->c, b); - i->from = !bpos_eq(SPOS_MAX, b->key.k.p) - ? bpos_successor(b->key.k.p) - : b->key.k.p; - - ret = drop_locks_do(trans, flush_buf(i)); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - - bch2_trans_put(trans); - - if (!ret) - ret = flush_buf(i); + return bch2_trans_run(i->c, + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; - return ret ?: i->ret; + drop_locks_do(trans, flush_buf(i)); + }))) ?: i->ret; } static const struct file_operations btree_format_debug_ops = { diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d743da89308e..32bfdf19289a 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -100,20 +100,19 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { .is_visible = dirent_is_visible, }; -int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); int ret = 0; - bkey_fsck_err_on(!d_name.len, c, err, - dirent_empty_name, + bkey_fsck_err_on(!d_name.len, + c, dirent_empty_name, "empty name"); - bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, - dirent_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), + c, dirent_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); @@ -121,27 +120,27 @@ int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, * Check new keys don't exceed the max length * (older keys may be larger.) */ - bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, c, err, - dirent_name_too_long, + bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, + c, dirent_name_too_long, "dirent name too big (%u > %u)", d_name.len, BCH_NAME_MAX); - bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, - dirent_name_embedded_nul, + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), + c, dirent_name_embedded_nul, "dirent has stray data after name's NUL"); bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || - (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, - dirent_name_dot_or_dotdot, + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), + c, dirent_name_dot_or_dotdot, "invalid name"); - bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, - dirent_name_has_slash, + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), + c, dirent_name_has_slash, "name with /"); bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, - dirent_to_itself, + le64_to_cpu(d.v->d_inum) == d.k->p.inode, + c, dirent_to_itself, "dirent points to own directory"); fsck_err: return ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 24037e6e0a09..8945145865c5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,12 +7,11 @@ enum bch_validate_flags; extern const struct bch_hash_desc bch2_dirent_hash_desc; -int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ - .key_invalid = bch2_dirent_invalid, \ + .key_validate = bch2_dirent_validate, \ .val_to_text = bch2_dirent_to_text, \ .min_val_size = 16, \ }) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 046ac92b6639..e972e2bca546 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -126,9 +126,8 @@ static inline bool is_zero(char *start, char *end) #define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) -int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, k.k->p); @@ -144,18 +143,18 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, break; case BCH_DISK_ACCOUNTING_replicas: bkey_fsck_err_on(!acc_k.replicas.nr_devs, - c, err, accounting_key_replicas_nr_devs_0, + c, accounting_key_replicas_nr_devs_0, "accounting key replicas entry with nr_devs=0"); bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || (acc_k.replicas.nr_required > 1 && acc_k.replicas.nr_required == acc_k.replicas.nr_devs), - c, err, accounting_key_replicas_nr_required_bad, + c, accounting_key_replicas_nr_required_bad, "accounting key replicas entry with bad nr_required"); for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) - bkey_fsck_err_on(acc_k.replicas.devs[i] > acc_k.replicas.devs[i + 1], - c, err, accounting_key_replicas_devs_unsorted, + bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], + c, accounting_key_replicas_devs_unsorted, "accounting key replicas entry with unsorted devs"); end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); @@ -178,7 +177,7 @@ int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, } bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), - c, err, accounting_key_junk_at_end, + c, accounting_key_junk_at_end, "junk at end of accounting key"); fsck_err: return ret; @@ -528,6 +527,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, e->pos); + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS]; u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS]; @@ -564,7 +566,7 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; accounting_key_init(&k_i.k, &acc_k, src_v, nr); - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false); + bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -593,7 +595,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) return 0; percpu_down_read(&c->mark_lock); - int ret = __bch2_accounting_mem_mod(c, bkey_s_c_to_accounting(k), false); + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); percpu_up_read(&c->mark_lock); if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && @@ -760,6 +762,15 @@ void bch2_verify_accounting_clean(struct bch_fs *c) struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); unsigned nr = bch2_accounting_counters(k.k); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { @@ -775,9 +786,6 @@ void bch2_verify_accounting_clean(struct bch_fs *c) mismatch = true; } - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 3d3f25e08b69..f29fd0dd9581 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -82,14 +82,13 @@ int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, s64 *, unsigned, bool); int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); -int bch2_accounting_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_accounting_swab(struct bkey_s); #define bch2_bkey_ops_accounting ((struct bkey_ops) { \ - .key_invalid = bch2_accounting_invalid, \ + .key_validate = bch2_accounting_validate, \ .val_to_text = bch2_accounting_to_text, \ .swab = bch2_accounting_swab, \ .min_val_size = 8, \ @@ -107,41 +106,20 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r) int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); void bch2_accounting_mem_gc(struct bch_fs *); -static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) -{ - struct bch_accounting_mem *acc = &c->accounting; - unsigned idx; - - EBUG_ON(gc && !acc->gc_running); - - while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { - int ret = bch2_accounting_mem_insert(c, a, gc); - if (ret) - return ret; - } - - struct accounting_mem_entry *e = &acc->k.data[idx]; - - EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); - - for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) - this_cpu_add(e->v[gc][i], a.v->d[i]); - return 0; -} - /* * Update in memory counters so they match the btree update we're doing; called * from transaction commit path */ -static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) +static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) { struct bch_fs *c = trans->c; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a.k->p); - if (!gc) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + return 0; + if (!gc && !read) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; @@ -162,13 +140,31 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru } } - return __bch2_accounting_mem_mod(c, a, gc); + struct bch_accounting_mem *acc = &c->accounting; + unsigned idx; + + EBUG_ON(gc && !acc->gc_running); + + while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { + int ret = bch2_accounting_mem_insert(c, a, gc); + if (ret) + return ret; + } + + struct accounting_mem_entry *e = &acc->k.data[idx]; + + EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); + + for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) + this_cpu_add(e->v[gc][i], a.v->d[i]); + return 0; } static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); percpu_up_read(&trans->c->mark_lock); return ret; } diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index a93cf26ff4a9..7b6e6c97e6aa 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -103,7 +103,8 @@ static inline bool data_type_is_hidden(enum bch_data_type type) x(compression, 4) \ x(snapshot, 5) \ x(btree, 6) \ - x(rebalance_work, 7) + x(rebalance_work, 7) \ + x(inum, 8) enum disk_accounting_type { #define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr, @@ -136,6 +137,10 @@ struct bch_acct_btree { __u32 id; } __packed; +struct bch_acct_inum { + __u64 inum; +} __packed; + struct bch_acct_rebalance_work { }; @@ -152,6 +157,7 @@ struct disk_accounting_pos { struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; struct bch_acct_rebalance_work rebalance_work; + struct bch_acct_inum inum; } __packed; } __packed; struct bpos _pad; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 84f1cbf6497f..141a4c63142f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -107,24 +107,23 @@ struct ec_bio { /* Stripes btree keys: */ -int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; int ret = 0; bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || - bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, - stripe_pos_bad, + bpos_gt(k.k->p, POS(0, U32_MAX)), + c, stripe_pos_bad, "stripe at bad pos"); - bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, - stripe_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), + c, stripe_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(k.k), stripe_val_u64s(s)); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 84a23eeb6249..90962b3c0130 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -8,8 +8,7 @@ enum bch_validate_flags; -int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, @@ -17,7 +16,7 @@ int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ - .key_invalid = bch2_stripe_invalid, \ + .key_validate = bch2_stripe_validate, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_stripe, \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a268af3e52bf..ab5a7adece10 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -166,6 +166,7 @@ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ x(BCH_ERR_fsck, fsck_fix) \ + x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index a62b63108820..95afa7bf2020 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -416,6 +416,28 @@ err: return ret; } +int __bch2_bkey_fsck_err(struct bch_fs *c, + struct bkey_s_c k, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) +{ + struct printbuf buf = PRINTBUF; + va_list args; + + prt_str(&buf, "invalid bkey "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\n "); + va_start(args, fmt); + prt_vprintf(&buf, fmt, args); + va_end(args); + prt_str(&buf, ": delete?"); + + int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + printbuf_exit(&buf); + return ret; +} + void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 995e6bba9bad..2f1b86978f36 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -4,6 +4,7 @@ #include <linux/list.h> #include <linux/printk.h> +#include "bkey_types.h" #include "sb-errors.h" struct bch_dev; @@ -166,24 +167,30 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -__printf(4, 0) -static inline void bch2_bkey_fsck_err(struct bch_fs *c, - struct printbuf *err_msg, - enum bch_sb_error_id err_type, - const char *fmt, ...) -{ - va_list args; +__printf(5, 6) +int __bch2_bkey_fsck_err(struct bch_fs *, + struct bkey_s_c, + enum bch_fsck_flags, + enum bch_sb_error_id, + const char *, ...); - va_start(args, fmt); - prt_vprintf(err_msg, fmt, args); - va_end(args); -} - -#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ +/* + * for now, bkey fsck errors are always handled by deleting the entire key - + * this will change at some point + */ +#define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - prt_printf(_err_msg, __VA_ARGS__); \ - bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ - ret = -BCH_ERR_invalid_bkey; \ + if ((flags & BCH_VALIDATE_silent)) { \ + ret = -BCH_ERR_fsck_delete_bkey; \ + goto fsck_err; \ + } \ + int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ + BCH_FSCK_ERR_##_err_type, \ + _err_msg, ##__VA_ARGS__); \ + if (_ret != -BCH_ERR_fsck_fix && \ + _ret != -BCH_ERR_fsck_ignore) \ + ret = _ret; \ + ret = -BCH_ERR_fsck_delete_bkey; \ goto fsck_err; \ } while (0) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 07973198e35f..4419ad3e454e 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -171,17 +171,16 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, - btree_ptr_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, + c, btree_ptr_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -192,28 +191,27 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, - c, err, btree_ptr_v2_val_too_big, + c, btree_ptr_v2_val_too_big, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), - c, err, btree_ptr_v2_min_key_bad, + c, btree_ptr_v2_min_key_bad, "min_key > key"); if (flags & BCH_VALIDATE_write) bkey_fsck_err_on(!bp.v->sectors_written, - c, err, btree_ptr_v2_written_0, + c, btree_ptr_v2_written_0, "sectors_written == 0"); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -399,15 +397,14 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); int ret = 0; - bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, - reservation_key_nr_replicas_invalid, + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, + c, reservation_key_nr_replicas_invalid, "invalid nr_replicas (%u)", r.v->nr_replicas); fsck_err: return ret; @@ -1102,14 +1099,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } - -static int extent_ptr_invalid(struct bch_fs *c, - struct bkey_s_c k, - enum bch_validate_flags flags, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata, - struct printbuf *err) +static int extent_ptr_validate(struct bch_fs *c, + struct bkey_s_c k, + enum bch_validate_flags flags, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) { int ret = 0; @@ -1128,28 +1123,27 @@ static int extent_ptr_invalid(struct bch_fs *c, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr2) - bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, - ptr_to_duplicate_device, + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, + c, ptr_to_duplicate_device, "multiple pointers to same device (%u)", ptr->dev); - bkey_fsck_err_on(bucket >= nbuckets, c, err, - ptr_after_last_bucket, + bkey_fsck_err_on(bucket >= nbuckets, + c, ptr_after_last_bucket, "pointer past last bucket (%llu > %llu)", bucket, nbuckets); - bkey_fsck_err_on(bucket < first_bucket, c, err, - ptr_before_first_bucket, + bkey_fsck_err_on(bucket < first_bucket, + c, ptr_before_first_bucket, "pointer before first bucket (%llu < %u)", bucket, first_bucket); - bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, c, err, - ptr_spans_multiple_buckets, + bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, + c, ptr_spans_multiple_buckets, "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, bucket_size); fsck_err: return ret; } -int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1164,25 +1158,24 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, - extent_ptrs_invalid_entry, - "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, + c, extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry), c, err, - btree_ptr_has_non_ptr, + !extent_entry_is_ptr(entry), + c, btree_ptr_has_non_ptr, "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ret = extent_ptr_invalid(c, k, flags, &entry->ptr, - size_ondisk, false, err); + ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); if (ret) return ret; - bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, - ptr_cached_and_erasure_coded, + bkey_fsck_err_on(entry->ptr.cached && have_ec, + c, ptr_cached_and_erasure_coded, "cached, erasure coded ptr"); if (!entry->ptr.unwritten) @@ -1199,44 +1192,50 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, - ptr_crc_uncompressed_size_too_small, + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, "checksum offset + key size > uncompressed size"); - bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, - ptr_crc_csum_type_unknown, + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), + c, ptr_crc_csum_type_unknown, "invalid checksum type"); - bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, - ptr_crc_compression_type_unknown, + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, + c, ptr_crc_compression_type_unknown, "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; else if (nonce != crc.offset + crc.nonce) - bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + bkey_fsck_err(c, ptr_crc_nonce_mismatch, "incorrect nonce"); } - bkey_fsck_err_on(crc_since_last_ptr, c, err, - ptr_crc_redundant, + bkey_fsck_err_on(crc_since_last_ptr, + c, ptr_crc_redundant, "redundant crc entry"); crc_since_last_ptr = true; bkey_fsck_err_on(crc_is_encoded(crc) && (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), c, err, - ptr_crc_uncompressed_size_too_big, + (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, "too large encoded extent"); size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - bkey_fsck_err_on(have_ec, c, err, - ptr_stripe_redundant, + bkey_fsck_err_on(have_ec, + c, ptr_stripe_redundant, "redundant stripe entry"); have_ec = true; break; case BCH_EXTENT_ENTRY_rebalance: { + /* + * this shouldn't be a fsck error, for forward + * compatibility; the rebalance code should just refetch + * the compression opt if it's unknown + */ +#if 0 const struct bch_extent_rebalance *r = &entry->rebalance; if (!bch2_compression_opt_valid(r->compression)) { @@ -1245,28 +1244,29 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, opt.type, opt.level); return -BCH_ERR_invalid_bkey; } +#endif break; } } } - bkey_fsck_err_on(!nr_ptrs, c, err, - extent_ptrs_no_ptrs, + bkey_fsck_err_on(!nr_ptrs, + c, extent_ptrs_no_ptrs, "no ptrs"); - bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, - extent_ptrs_too_many_ptrs, + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, + c, extent_ptrs_too_many_ptrs, "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); - bkey_fsck_err_on(have_written && have_unwritten, c, err, - extent_ptrs_written_and_unwritten, + bkey_fsck_err_on(have_written && have_unwritten, + c, extent_ptrs_written_and_unwritten, "extent with unwritten and written ptrs"); - bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, - extent_ptrs_unwritten, + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, + c, extent_ptrs_unwritten, "has unwritten ptrs"); - bkey_fsck_err_on(crc_since_last_ptr, c, err, - extent_ptrs_redundant_crc, + bkey_fsck_err_on(crc_since_last_ptr, + c, extent_ptrs_redundant_crc, "redundant crc entry"); - bkey_fsck_err_on(have_ec, c, err, - extent_ptrs_redundant_stripe, + bkey_fsck_err_on(have_ec, + c, extent_ptrs_redundant_stripe, "redundant stripe entry"); fsck_err: return ret; diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index facdb8a86eec..1a6ddee48041 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -409,26 +409,26 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, int, struct bkey_s); #define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ + .key_validate = bch2_btree_ptr_validate, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_v2_invalid, \ + .key_validate = bch2_btree_ptr_v2_validate, \ .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ @@ -441,7 +441,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_extent ((struct bkey_ops) { \ - .key_invalid = bch2_bkey_ptrs_invalid, \ + .key_validate = bch2_bkey_ptrs_validate, \ .val_to_text = bch2_bkey_ptrs_to_text, \ .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ @@ -451,13 +451,13 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_reservation ((struct bkey_ops) { \ - .key_invalid = bch2_reservation_invalid, \ + .key_validate = bch2_reservation_validate, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ .trigger = bch2_trigger_reservation, \ @@ -683,8 +683,8 @@ bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_ptr_swab(struct bkey_s); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 15fc41e63b6c..94c392abef65 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -193,7 +193,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino * only insert fully created inodes in the inode hash table. But * discard_new_inode() expects it to be set... */ - inode->v.i_flags |= I_NEW; + inode->v.i_state |= I_NEW; /* * We don't want bch2_evict_inode() to delete the inode on disk, * we just raced and had another inode in cache. Normally new diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 1e20020eadd1..2be6be33afa3 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -434,100 +434,98 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) return &inode_p->inode.k_i; } -static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) +static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bch_inode_unpacked unpacked; int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); - bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, - inode_pos_blockdev_range, + bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, + c, inode_pos_blockdev_range, "fs inode in blockdev range"); - bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, - inode_unpack_error, + bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), + c, inode_unpack_error, "invalid variable length fields"); - bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, - inode_checksum_type_invalid, + bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, + c, inode_checksum_type_invalid, "invalid data checksum type (%u >= %u", unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); bkey_fsck_err_on(unpacked.bi_compression && - !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, - inode_compression_type_invalid, + !bch2_compression_opt_valid(unpacked.bi_compression - 1), + c, inode_compression_type_invalid, "invalid compression opt %u", unpacked.bi_compression - 1); bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && - unpacked.bi_nlink != 0, c, err, - inode_unlinked_but_nlink_nonzero, + unpacked.bi_nlink != 0, + c, inode_unlinked_but_nlink_nonzero, "flagged as unlinked but bi_nlink != 0"); - bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, - inode_subvol_root_but_not_dir, + bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), + c, inode_subvol_root_but_not_dir, "subvolume root but not a directory"); fsck_err: return ret; } -int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); int ret = 0; - bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); int ret = 0; - bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); int ret = 0; bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || - INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, - inode_v3_fields_start_bad, + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), + c, inode_v3_fields_start_bad, "invalid fields_start (got %llu, min %u max %zu)", INODEv3_FIELDS_START(inode.v), INODEv3_FIELDS_START_INITIAL, bkey_val_u64s(inode.k)); - bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } @@ -625,14 +623,13 @@ int bch2_trigger_inode(struct btree_trans *trans, return 0; } -int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); fsck_err: return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index da0e4a745099..f1fcb4c58039 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,12 +9,12 @@ enum bch_validate_flags; extern const char * const bch2_inode_opts[]; -int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, @@ -22,21 +22,21 @@ int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ - .key_invalid = bch2_inode_invalid, \ + .key_validate = bch2_inode_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v2_invalid, \ + .key_validate = bch2_inode_v2_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v3_invalid, \ + .key_validate = bch2_inode_v3_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 48, \ @@ -49,12 +49,12 @@ static inline bool bkey_is_inode(const struct bkey *k) k->type == KEY_TYPE_inode_v3; } -int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ - .key_invalid = bch2_inode_generation_invalid, \ + .key_validate = bch2_inode_generation_validate, \ .val_to_text = bch2_inode_generation_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7a833a3f1c63..7664b68e6a15 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -332,7 +332,6 @@ static int journal_validate_key(struct bch_fs *c, { int write = flags & BCH_VALIDATE_write; void *next = vstruct_next(entry); - struct printbuf buf = PRINTBUF; int ret = 0; if (journal_entry_err_on(!k->k.u64s, @@ -368,34 +367,21 @@ static int journal_validate_key(struct bch_fs *c, bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf)) { - printbuf_reset(&buf); - journal_entry_err_msg(&buf, version, jset, entry); - prt_newline(&buf); - printbuf_indent_add(&buf, 2); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - prt_newline(&buf); - bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf); - - mustfix_fsck_err(c, journal_entry_bkey_invalid, - "%s", buf.buf); - + ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), + __btree_node_type(level, btree_id), write); + if (ret == -BCH_ERR_fsck_delete_bkey) { le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); journal_entry_null_range(vstruct_next(entry), next); - - printbuf_exit(&buf); return FSCK_DELETED_KEY; } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); fsck_err: - printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 83b1586cb371..96f2f4f8c397 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -10,14 +10,13 @@ #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ -int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, - lru_entry_at_time_0, + bkey_fsck_err_on(!lru_pos_time(k.k->p), + c, lru_entry_at_time_0, "lru entry at time=0"); fsck_err: return ret; diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 5bd8974a7f11..e6a7d8241bb8 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -33,14 +33,13 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) return BCH_LRU_read; } -int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_lru_pos_to_text(struct printbuf *, struct bpos); #define bch2_bkey_ops_lru ((struct bkey_ops) { \ - .key_invalid = bch2_lru_invalid, \ + .key_validate = bch2_lru_validate, \ .val_to_text = bch2_lru_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a0cca8b70e0a..c32a05e252e2 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -59,13 +59,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { .to_text = bch2_sb_quota_to_text, }; -int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, - quota_type_invalid, + bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, + c, quota_type_invalid, "invalid quota type (%llu >= %u)", k.k->p.inode, QTYP_NR); fsck_err: diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 02d37a332218..a62abcc5332a 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,12 +8,11 @@ enum bch_validate_flags; extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_quota ((struct bkey_ops) { \ - .key_invalid = bch2_quota_invalid, \ + .key_validate = bch2_quota_validate, \ .val_to_text = bch2_quota_to_text, \ .min_val_size = 32, \ }) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 5f92715e1525..e59c0abb4772 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -29,15 +29,14 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) /* reflink pointers */ -int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); int ret = 0; bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), - c, err, reflink_p_front_pad_bad, + c, reflink_p_front_pad_bad, "idx < front_pad (%llu < %u)", le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); fsck_err: @@ -256,11 +255,10 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, /* indirect extents */ -int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { - return bch2_bkey_ptrs_invalid(c, k, flags, err); + return bch2_bkey_ptrs_validate(c, k, flags); } void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, @@ -311,9 +309,8 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, /* indirect inline data */ -int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index e894f3a2c67a..51afe11d8ed6 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -4,41 +4,37 @@ enum bch_validate_flags; -int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_p_invalid, \ + .key_validate = bch2_reflink_p_validate, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ .trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) -int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_v_invalid, \ + .key_validate = bch2_reflink_v_validate, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_reflink_v, \ .min_val_size = 8, \ }) -int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_indirect_inline_data(struct btree_trans *, @@ -47,7 +43,7 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ - .key_invalid = bch2_indirect_inline_data_invalid, \ + .key_validate = bch2_indirect_inline_data_validate, \ .val_to_text = bch2_indirect_inline_data_to_text, \ .trigger = bch2_trigger_indirect_inline_data, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 6c4469f53313..650a1f77ca40 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -72,7 +72,10 @@ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ - BCH_FSCK_ERR_accounting_key_junk_at_end) + BCH_FSCK_ERR_accounting_key_junk_at_end) \ + x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ @@ -104,6 +107,7 @@ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_accounting_replicas_not_marked, \ BCH_FSCK_ERR_bkey_version_in_future) struct upgrade_downgrade_entry { diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 96744b1a76f5..8b18a9b483a4 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -31,15 +31,14 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(t.v->root_snapshot)); } -int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_tree_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_tree_pos_bad, "bad pos"); fsck_err: return ret; @@ -225,55 +224,54 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(s.v->skip[2])); } -int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_snapshot s; u32 i, id; int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_pos_bad, "bad pos"); s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); - bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, - snapshot_parent_bad, + bkey_fsck_err_on(id && id <= k.k->p.offset, + c, snapshot_parent_bad, "bad parent node (%u <= %llu)", id, k.k->p.offset); - bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, - snapshot_children_not_normalized, + bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), + c, snapshot_children_not_normalized, "children not normalized"); - bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, - snapshot_child_duplicate, + bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], + c, snapshot_child_duplicate, "duplicate child nodes"); for (i = 0; i < 2; i++) { id = le32_to_cpu(s.v->children[i]); - bkey_fsck_err_on(id >= k.k->p.offset, c, err, - snapshot_child_bad, + bkey_fsck_err_on(id >= k.k->p.offset, + c, snapshot_child_bad, "bad child node (%u >= %llu)", id, k.k->p.offset); } if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, - snapshot_skiplist_not_normalized, + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), + c, snapshot_skiplist_not_normalized, "skiplist not normalized"); for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, - snapshot_skiplist_bad, + bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), + c, snapshot_skiplist_bad, "bad skiplist node %u", id); } } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 31b0ee03e962..eb5ef64221d6 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -5,11 +5,11 @@ enum bch_validate_flags; void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_tree_invalid, \ + .key_validate = bch2_snapshot_tree_validate, \ .val_to_text = bch2_snapshot_tree_to_text, \ .min_val_size = 8, \ }) @@ -19,14 +19,13 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_invalid, \ + .key_validate = bch2_snapshot_validate, \ .val_to_text = bch2_snapshot_to_text, \ .trigger = bch2_mark_snapshot, \ .min_val_size = 24, \ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index f56720b55862..dbe834cb349f 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -207,23 +207,23 @@ int bch2_check_subvol_children(struct bch_fs *c) /* Subvolumes: */ -int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); int ret = 0; bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || - bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, - subvol_pos_bad, + bkey_gt(k.k->p, SUBVOL_POS_MAX), + c, subvol_pos_bad, "invalid pos"); - bkey_fsck_err_on(!subvol.v->snapshot, c, err, - subvol_snapshot_bad, + bkey_fsck_err_on(!subvol.v->snapshot, + c, subvol_snapshot_bad, "invalid snapshot"); - bkey_fsck_err_on(!subvol.v->inode, c, err, - subvol_inode_bad, + bkey_fsck_err_on(!subvol.v->inode, + c, subvol_inode_bad, "invalid inode"); fsck_err: return ret; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index afa5e871efb2..a8299ba2cab2 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -10,15 +10,14 @@ enum bch_validate_flags; int bch2_check_subvols(struct bch_fs *); int bch2_check_subvol_children(struct bch_fs *); -int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_subvolume ((struct bkey_ops) { \ - .key_invalid = bch2_subvolume_invalid, \ + .key_validate = bch2_subvolume_validate, \ .val_to_text = bch2_subvolume_to_text, \ .trigger = bch2_subvolume_trigger, \ .min_val_size = 16, \ diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index dc48b52b01b4..dfad1d06633d 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -4,6 +4,7 @@ #include "buckets.h" #include "btree_cache.h" #include "btree_iter.h" +#include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "keylist.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index d0e6b9deb6cb..c62f00322d1e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -988,10 +988,33 @@ TRACE_EVENT(trans_restart_split_race, __entry->u64s_remaining) ); -DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, +TRACE_EVENT(trans_blocked_journal_reclaim, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + TP_ARGS(trans, caller_ip), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + + __field(unsigned long, key_cache_nr_keys ) + __field(unsigned long, key_cache_nr_dirty ) + __field(long, must_wait ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->key_cache_nr_keys = atomic_long_read(&trans->c->btree_key_cache.nr_keys); + __entry->key_cache_nr_dirty = atomic_long_read(&trans->c->btree_key_cache.nr_dirty); + __entry->must_wait = __bch2_btree_key_cache_must_wait(trans->c); + ), + + TP_printk("%s %pS key cache keys %lu dirty %lu must_wait %li", + __entry->trans_fn, (void *) __entry->caller_ip, + __entry->key_cache_nr_keys, + __entry->key_cache_nr_dirty, + __entry->must_wait) ); TRACE_EVENT(trans_restart_journal_preres_get, diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index c11bf6dacc2c..f2b4c17a0307 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -70,17 +70,16 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len)); int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, - xattr_val_size_too_small, + bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, + c, xattr_val_size_too_small, "value too small (%zu < %u)", bkey_val_u64s(k.k), val_u64s); @@ -88,17 +87,17 @@ int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len) + 4); - bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, - xattr_val_size_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, + c, xattr_val_size_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), val_u64s); - bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, - xattr_invalid_type, + bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), + c, xattr_invalid_type, "invalid type (%u)", xattr.v->x_type); - bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, - xattr_name_invalid_chars, + bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), + c, xattr_name_invalid_chars, "xattr name has invalid characters"); fsck_err: return ret; diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 1574b9eb4c85..c188a5ad64ce 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -6,12 +6,11 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; -int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ - .key_invalid = bch2_xattr_invalid, \ + .key_validate = bch2_xattr_validate, \ .val_to_text = bch2_xattr_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f5996a43db24..eaa1dbd31352 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 offset = bytenr - block_group->start; u64 to_free, to_unusable; int bg_reclaim_threshold = 0; - bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + bool initial; u64 reclaimable_unusable; - WARN_ON(!initial && offset + size > block_group->zone_capacity); + spin_lock(&block_group->lock); + initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + WARN_ON(!initial && offset + size > block_group->zone_capacity); if (!initial) bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); - spin_lock(&ctl->tree_lock); if (!used) to_free = size; else if (initial) @@ -2718,7 +2719,9 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, to_free = offset + size - block_group->alloc_offset; to_unusable = size - to_free; + spin_lock(&ctl->tree_lock); ctl->free_space += to_free; + spin_unlock(&ctl->tree_lock); /* * If the block group is read-only, we should account freed space into * bytes_readonly. @@ -2727,11 +2730,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, block_group->zone_unusable += to_unusable; WARN_ON(block_group->zone_unusable > block_group->length); } - spin_unlock(&ctl->tree_lock); if (!used) { - spin_lock(&block_group->lock); block_group->alloc_offset -= size; - spin_unlock(&block_group->lock); } reclaimable_unusable = block_group->zone_unusable - @@ -2745,6 +2745,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, btrfs_mark_bg_to_reclaim(block_group); } + spin_unlock(&block_group->lock); + return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 333b0e8587a2..b1b6564ab68f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4195,6 +4195,7 @@ err: btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); + inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); ret = btrfs_update_inode(trans, dir); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7fc692fc76e1..619fa0b8b3f6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -347,7 +347,7 @@ struct name_cache_entry { int ret; int need_later_update; int name_len; - char name[]; + char name[] __counted_by(name_len); }; /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 11044e9e2cb1..98fa0f382480 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2402,7 +2402,13 @@ static long btrfs_nr_cached_objects(struct super_block *sb, struct shrink_contro trace_btrfs_extent_map_shrinker_count(fs_info, nr); - return nr; + /* + * Only report the real number for DEBUG builds, as there are reports of + * serious performance degradation caused by too frequent shrinks. + */ + if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) + return nr; + return 0; } static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 6f1e2f2215d9..634d69964fe4 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1764,6 +1764,72 @@ static int check_raid_stripe_extent(const struct extent_buffer *leaf, return 0; } +static int check_dev_extent_item(const struct extent_buffer *leaf, + const struct btrfs_key *key, + int slot, + struct btrfs_key *prev_key) +{ + struct btrfs_dev_extent *de; + const u32 sectorsize = leaf->fs_info->sectorsize; + + de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + /* Basic fixed member checks. */ + if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) != + BTRFS_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk tree id, has %llu expect %llu", + btrfs_dev_extent_chunk_tree(leaf, de), + BTRFS_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk objectid, has %llu expect %llu", + btrfs_dev_extent_chunk_objectid(leaf, de), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + /* Alignment check. */ + if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent key.offset, has %llu not aligned to %u", + key->offset, sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent chunk offset, has %llu not aligned to %u", + btrfs_dev_extent_chunk_objectid(leaf, de), + sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent length, has %llu not aligned to %u", + btrfs_dev_extent_length(leaf, de), sectorsize); + return -EUCLEAN; + } + /* Overlap check with previous dev extent. */ + if (slot && prev_key->objectid == key->objectid && + prev_key->type == key->type) { + struct btrfs_dev_extent *prev_de; + u64 prev_len; + + prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent); + prev_len = btrfs_dev_extent_length(leaf, prev_de); + if (unlikely(prev_key->offset + prev_len > key->offset)) { + generic_err(leaf, slot, + "dev extent overlap, prev offset %llu len %llu current offset %llu", + prev_key->objectid, prev_len, key->offset); + return -EUCLEAN; + } + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -1800,6 +1866,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(leaf, key, slot); break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(leaf, key, slot, prev_key); + break; case BTRFS_INODE_ITEM_KEY: ret = check_inode_item(leaf, key, slot); break; diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2193a6710c8f..c3b90abdee37 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -8,19 +8,15 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, void *dentry_blk, struct erofs_dirent *de, - unsigned int nameoff, unsigned int maxsize) + unsigned int nameoff0, unsigned int maxsize) { - const struct erofs_dirent *end = dentry_blk + nameoff; + const struct erofs_dirent *end = dentry_blk + nameoff0; while (de < end) { - const char *de_name; + unsigned char d_type = fs_ftype_to_dtype(de->file_type); + unsigned int nameoff = le16_to_cpu(de->nameoff); + const char *de_name = (char *)dentry_blk + nameoff; unsigned int de_namelen; - unsigned char d_type; - - d_type = fs_ftype_to_dtype(de->file_type); - - nameoff = le16_to_cpu(de->nameoff); - de_name = (char *)dentry_blk + nameoff; /* the last dirent in the block? */ if (de + 1 >= end) @@ -52,21 +48,20 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = dir->i_sb; unsigned long bsz = sb->s_blocksize; - const size_t dirsize = i_size_read(dir); - unsigned int i = erofs_blknr(sb, ctx->pos); unsigned int ofs = erofs_blkoff(sb, ctx->pos); int err = 0; bool initial = true; buf.mapping = dir->i_mapping; - while (ctx->pos < dirsize) { + while (ctx->pos < dir->i_size) { + erofs_off_t dbstart = ctx->pos - ofs; struct erofs_dirent *de; unsigned int nameoff, maxsize; - de = erofs_bread(&buf, erofs_pos(sb, i), EROFS_KMAP); + de = erofs_bread(&buf, dbstart, EROFS_KMAP); if (IS_ERR(de)) { erofs_err(sb, "fail to readdir of logical block %u of nid %llu", - i, EROFS_I(dir)->nid); + erofs_blknr(sb, dbstart), EROFS_I(dir)->nid); err = PTR_ERR(de); break; } @@ -79,25 +74,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; } - maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz); - + maxsize = min_t(unsigned int, dir->i_size - dbstart, bsz); /* search dirents at the arbitrary position */ if (initial) { initial = false; - ofs = roundup(ofs, sizeof(struct erofs_dirent)); - ctx->pos = erofs_pos(sb, i) + ofs; - if (ofs >= nameoff) - goto skip_this; + ctx->pos = dbstart + ofs; } err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs, nameoff, maxsize); if (err) break; -skip_this: - ctx->pos = erofs_pos(sb, i) + maxsize; - ++i; + ctx->pos = dbstart + maxsize; ofs = 0; } erofs_put_metabuf(&buf); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 43c09aae2afc..419432be3223 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -257,25 +257,23 @@ static int erofs_fill_inode(struct inode *inode) goto out_unlock; } + mapping_set_large_folios(inode->i_mapping); if (erofs_inode_is_data_compressed(vi->datalayout)) { #ifdef CONFIG_EROFS_FS_ZIP DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT, erofs_info, inode->i_sb, "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); inode->i_mapping->a_ops = &z_erofs_aops; - err = 0; - goto out_unlock; -#endif +#else err = -EOPNOTSUPP; - goto out_unlock; - } - inode->i_mapping->a_ops = &erofs_raw_access_aops; - mapping_set_large_folios(inode->i_mapping); +#endif + } else { + inode->i_mapping->a_ops = &erofs_raw_access_aops; #ifdef CONFIG_EROFS_FS_ONDEMAND - if (erofs_is_fscache_mode(inode->i_sb)) - inode->i_mapping->a_ops = &erofs_fscache_access_aops; + if (erofs_is_fscache_mode(inode->i_sb)) + inode->i_mapping->a_ops = &erofs_fscache_access_aops; #endif - + } out_unlock: erofs_put_metabuf(&buf); return err; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 736607675396..45dc15ebd870 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -220,7 +220,7 @@ struct erofs_buf { }; #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) -#define erofs_blknr(sb, addr) ((addr) >> (sb)->s_blocksize_bits) +#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits)) #define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1)) #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 32ce5b35e1df..6cb5c8916174 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -108,22 +108,6 @@ static void erofs_free_inode(struct inode *inode) kmem_cache_free(erofs_inode_cachep, vi); } -static bool check_layout_compatibility(struct super_block *sb, - struct erofs_super_block *dsb) -{ - const unsigned int feature = le32_to_cpu(dsb->feature_incompat); - - EROFS_SB(sb)->feature_incompat = feature; - - /* check if current kernel meets all mandatory requirements */ - if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { - erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", - feature & ~EROFS_ALL_FEATURE_INCOMPAT); - return false; - } - return true; -} - /* read variable-sized metadata, offset will be aligned by 4-byte */ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, erofs_off_t *offset, int *lengthp) @@ -279,7 +263,7 @@ static int erofs_scan_devices(struct super_block *sb, static int erofs_read_superblock(struct super_block *sb) { - struct erofs_sb_info *sbi; + struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; void *data; @@ -291,9 +275,7 @@ static int erofs_read_superblock(struct super_block *sb) return PTR_ERR(data); } - sbi = EROFS_SB(sb); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); - ret = -EINVAL; if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { erofs_err(sb, "cannot find valid erofs superblock"); @@ -318,8 +300,12 @@ static int erofs_read_superblock(struct super_block *sb) } ret = -EINVAL; - if (!check_layout_compatibility(sb, dsb)) + sbi->feature_incompat = le32_to_cpu(dsb->feature_incompat); + if (sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT) { + erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", + sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT); goto out; + } sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) { diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index 9b53883e5caf..37afe2024840 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -111,7 +111,8 @@ int z_erofs_gbuf_growsize(unsigned int nrpages) out: if (i < z_erofs_gbuf_count && tmp_pages) { for (j = 0; j < nrpages; ++j) - if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j]) + if (tmp_pages[j] && (j >= gbuf->nrpages || + tmp_pages[j] != gbuf->pages[j])) __free_page(tmp_pages[j]); kfree(tmp_pages); } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9eb191b5c4de..7146038b2fe7 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1618,9 +1618,11 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, this_num = min_t(unsigned, num, PAGE_SIZE - offset); err = fuse_copy_page(cs, &page, offset, this_num, 0); - if (!err && offset == 0 && - (this_num == PAGE_SIZE || file_size == end)) + if (!PageUptodate(page) && !err && offset == 0 && + (this_num == PAGE_SIZE || file_size == end)) { + zero_user_segment(page, this_num, PAGE_SIZE); SetPageUptodate(page); + } unlock_page(page); put_page(page); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 3f3842e7b44a..1fc66bcf49eb 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -316,7 +316,7 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) #endif } - if (rdata->credits.value != 0) + if (rdata->credits.value != 0) { trace_smb3_rw_credits(rdata->rreq->debug_id, rdata->subreq.debug_index, rdata->credits.value, @@ -324,8 +324,12 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) rdata->server ? rdata->server->in_flight : 0, -rdata->credits.value, cifs_trace_rw_credits_free_subreq); + if (rdata->server) + add_credits_and_wake_if(rdata->server, &rdata->credits, 0); + else + rdata->credits.value = 0; + } - add_credits_and_wake_if(rdata->server, &rdata->credits, 0); if (rdata->have_xid) free_xid(rdata->xid); } @@ -2750,6 +2754,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); ssize_t rc; rc = netfs_start_io_write(inode); @@ -2766,12 +2771,16 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) if (rc <= 0) goto out; - if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) && + (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, 0, - NULL, CIFS_WRITE_OP)) - rc = netfs_buffered_write_iter_locked(iocb, from, NULL); - else + NULL, CIFS_WRITE_OP))) { rc = -EACCES; + goto out; + } + + rc = netfs_buffered_write_iter_locked(iocb, from, NULL); + out: up_read(&cinode->lock_sem); netfs_end_io_write(inode); diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index c3ee42188d25..c769f9dbc0b4 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1216,6 +1216,8 @@ struct create_context { ); __u8 Buffer[]; } __packed; +static_assert(offsetof(struct create_context, Buffer) == sizeof(struct create_context_hdr), + "struct member likely outside of __struct_group()"); struct smb2_create_req { struct smb2_hdr hdr; diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 09e1e7771592..7889df8112b4 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -165,11 +165,43 @@ void ksmbd_all_conn_set_status(u64 sess_id, u32 status) up_read(&conn_list_lock); } -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id) +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn) { wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2); } +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id) +{ + struct ksmbd_conn *conn; + int rc, retry_count = 0, max_timeout = 120; + int rcount = 1; + +retry_idle: + if (retry_count >= max_timeout) + return -EIO; + + down_read(&conn_list_lock); + list_for_each_entry(conn, &conn_list, conns_list) { + if (conn->binding || xa_load(&conn->sessions, sess_id)) { + if (conn == curr_conn) + rcount = 2; + if (atomic_read(&conn->req_running) >= rcount) { + rc = wait_event_timeout(conn->req_running_q, + atomic_read(&conn->req_running) < rcount, + HZ); + if (!rc) { + up_read(&conn_list_lock); + retry_count++; + goto retry_idle; + } + } + } + } + up_read(&conn_list_lock); + + return 0; +} + int ksmbd_conn_write(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 5c2845e47cf2..5b947175c048 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -145,7 +145,8 @@ extern struct list_head conn_list; extern struct rw_semaphore conn_list_lock; bool ksmbd_conn_alive(struct ksmbd_conn *conn); -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id); +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn); +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id); struct ksmbd_conn *ksmbd_conn_alloc(void); void ksmbd_conn_free(struct ksmbd_conn *conn); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 162a12685d2c..99416ce9f501 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -311,6 +311,7 @@ void destroy_previous_session(struct ksmbd_conn *conn, { struct ksmbd_session *prev_sess; struct ksmbd_user *prev_user; + int err; down_write(&sessions_table_lock); down_write(&conn->session_lock); @@ -325,8 +326,16 @@ void destroy_previous_session(struct ksmbd_conn *conn, memcmp(user->passkey, prev_user->passkey, user->passkey_sz)) goto out; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_RECONNECT); + err = ksmbd_conn_wait_idle_sess_id(conn, id); + if (err) { + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); + goto out; + } + ksmbd_destroy_file_table(&prev_sess->file_table); prev_sess->state = SMB2_SESSION_EXPIRED; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); ksmbd_launch_ksmbd_durable_scavenger(); out: up_write(&conn->session_lock); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 2df1354288e6..0bc9edf22ba4 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1370,7 +1370,8 @@ static int ntlm_negotiate(struct ksmbd_work *work, } sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); out: @@ -1453,7 +1454,9 @@ static int ntlm_authenticate(struct ksmbd_work *work, return -ENOMEM; sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, + spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); kfree(spnego_blob); } @@ -2210,7 +2213,7 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_conn_unlock(conn); ksmbd_close_session_fds(work); - ksmbd_conn_wait_idle(conn, sess_id); + ksmbd_conn_wait_idle(conn); /* * Re-lookup session to validate if session is deleted @@ -5357,7 +5360,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, "NTFS", PATH_MAX, conn->local_nls, 0); len = len * 2; info->FileSystemNameLen = cpu_to_le32(len); - sz = sizeof(struct filesystem_attribute_info) - 2 + len; + sz = sizeof(struct filesystem_attribute_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } @@ -5383,7 +5386,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, len = len * 2; info->VolumeLabelSize = cpu_to_le32(len); info->Reserved = 0; - sz = sizeof(struct filesystem_vol_info) - 2 + len; + sz = sizeof(struct filesystem_vol_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index 4a3148b0167f..cc1d6dfe29d5 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -213,7 +213,7 @@ struct filesystem_attribute_info { __le32 Attributes; __le32 MaxPathNameComponentLength; __le32 FileSystemNameLen; - __le16 FileSystemName[1]; /* do not have to save this - get subset? */ + __le16 FileSystemName[]; /* do not have to save this - get subset? */ } __packed; struct filesystem_device_info { @@ -226,7 +226,7 @@ struct filesystem_vol_info { __le32 SerialNumber; __le32 VolumeLabelSize; __le16 Reserved; - __le16 VolumeLabel[1]; + __le16 VolumeLabel[]; } __packed; struct filesystem_info { diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 24a15bf784f1..5ab2ac53c920 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -938,7 +938,13 @@ xchk_bmap( } break; case XFS_ATTR_FORK: - if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) + /* + * "attr" means that an attr fork was created at some point in + * the life of this filesystem. "attr2" means that inodes have + * variable-sized data/attr fork areas. Hence we only check + * attr here. + */ + if (!xfs_has_attr(mp)) xchk_ino_set_corrupt(sc, sc->ip->i_ino); break; default: diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 4e933db75b12..6b13666d4e96 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -483,6 +483,17 @@ xfs_ioctl_setattr_xflags( /* Can't change realtime flag if any extents are allocated. */ if (ip->i_df.if_nextents || ip->i_delayed_blks) return -EINVAL; + + /* + * If S_DAX is enabled on this file, we can only switch the + * device if both support fsdax. We can't update S_DAX because + * there might be other threads walking down the access paths. + */ + if (IS_DAX(VFS_I(ip)) && + (mp->m_ddev_targp->bt_daxdev == NULL || + (mp->m_rtdev_targp && + mp->m_rtdev_targp->bt_daxdev == NULL))) + return -EINVAL; } if (rtflag) { diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0fafcc9f3dbe..8ede9d099d1f 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -644,7 +644,12 @@ xfsaild( set_freezable(); while (1) { - if (tout) + /* + * Long waits of 50ms or more occur when we've run out of items + * to push, so we only want uninterruptible state if we're + * actually blocked on something. + */ + if (tout && tout <= 20) set_current_state(TASK_KILLABLE|TASK_FREEZABLE); else set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 80dc36f9d527..9f1c1d225e32 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -660,13 +660,10 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status void *context)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, + u32 nax_depth, acpi_adr_space_type space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_execute_orphan_reg_method(acpi_handle device, - acpi_adr_space_type - space_id)) -ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, acpi_adr_space_type diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index dca2969015d8..6fbfbde68a37 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -5,6 +5,8 @@ #ifndef _NET_DSA_TAG_OCELOT_H #define _NET_DSA_TAG_OCELOT_H +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> #include <linux/kthread.h> #include <linux/packing.h> #include <linux/skbuff.h> @@ -273,4 +275,49 @@ static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) return rew_op; } +/** + * ocelot_xmit_get_vlan_info: Determine VLAN_TCI and TAG_TYPE for injected frame + * @skb: Pointer to socket buffer + * @br: Pointer to bridge device that the port is under, if any + * @vlan_tci: + * @tag_type: + * + * If the port is under a VLAN-aware bridge, remove the VLAN header from the + * payload and move it into the DSA tag, which will make the switch classify + * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, + * which is the pvid of standalone ports (OCELOT_STANDALONE_PVID), although not + * of VLAN-unaware bridge ports (that would be ocelot_vlan_unaware_pvid()). + * Anyway, VID 0 is fine because it is stripped on egress for these port modes, + * and source address learning is not performed for packets injected from the + * CPU anyway, so it doesn't matter that the VID is "wrong". + */ +static inline void ocelot_xmit_get_vlan_info(struct sk_buff *skb, + struct net_device *br, + u64 *vlan_tci, u64 *tag_type) +{ + struct vlan_ethhdr *hdr; + u16 proto, tci; + + if (!br || !br_vlan_enabled(br)) { + *vlan_tci = 0; + *tag_type = IFH_TAG_TYPE_C; + return; + } + + hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + br_vlan_get_proto(br, &proto); + + if (ntohs(hdr->h_vlan_proto) == proto) { + vlan_remove_tag(skb, &tci); + *vlan_tci = tci; + } else { + rcu_read_lock(); + br_vlan_get_pvid_rcu(br, &tci); + rcu_read_unlock(); + *vlan_tci = tci; + } + + *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; +} + #endif diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c9bf68c239a0..45bf05ad5c53 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason) static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { - if (huge_page_size(h) == PMD_SIZE) + const unsigned long size = huge_page_size(h); + + VM_WARN_ON(size == PAGE_SIZE); + + /* + * hugetlb must use the exact same PT locks as core-mm page table + * walkers would. When modifying a PTE table, hugetlb must take the + * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD + * PT lock etc. + * + * The expectation is that any hugetlb folio smaller than a PMD is + * always mapped into a single PTE table and that any hugetlb folio + * smaller than a PUD (but at least as big as a PMD) is always mapped + * into a single PMD table. + * + * If that does not hold for an architecture, then that architecture + * must disable split PT locks such that all *_lockptr() functions + * will give us the same result: the per-MM PT lock. + * + * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where + * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() + * and core-mm would use pmd_lockptr(). However, in such configurations + * split PMD locks are disabled -- they don't make sense on a single + * PGDIR page table -- and the end result is the same. + */ + if (size >= PUD_SIZE) + return pud_lockptr(mm, (pud_t *) pte); + else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) return pmd_lockptr(mm, (pmd_t *) pte); - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); - return &mm->page_table_lock; + /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ + return ptep_lockptr(mm, pte); } #ifndef hugepages_supported diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7eedd0c662da..377def497298 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev, struct acpi_resource; struct acpi_resource_i2c_serialbus; -#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C) +#if IS_REACHABLE(CONFIG_ACPI) && IS_REACHABLE(CONFIG_I2C) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4d47f2c33311..04cbdae0052e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -795,8 +795,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76..6549d0979b28 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE)); + BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE); + return ptlock_ptr(virt_to_ptdesc(pte)); +} + static inline bool ptlock_init(struct ptdesc *ptdesc) { /* @@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + return &mm->page_table_lock; +} static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41458892bc8a..1dc6248feb83 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,8 +220,6 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, - NR_MEMMAP, /* page metadata allocated through buddy allocator */ - NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */ NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/panic.h b/include/linux/panic.h index 3130e0b5116b..54d90b6c5f47 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -16,6 +16,7 @@ extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); +extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 18cd0c0c73d9..207f0c83c8e9 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref) page_ext_put(page_ext_from_codetag_ref(ref)); } +static inline void clear_page_tag_ref(struct page *page) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } +} + static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { @@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 59b3b752394d..35f039ecb272 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -266,12 +266,12 @@ bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) if (oldp) *oldp = old; - if (old == i) { + if (old > 0 && old == i) { smp_acquire__after_ctrl_dep(); return true; } - if (unlikely(old < 0 || old - i < 0)) + if (unlikely(old <= 0 || old - i < 0)) refcount_warn_saturate(r, REFCOUNT_SUB_UAF); return false; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e4f3f3d30a03..d47d5f14ff99 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -902,12 +902,29 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); -#if IS_ENABLED(CONFIG_ACPI) +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SPI_MASTER) extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); int acpi_spi_count_resources(struct acpi_device *adev); +#else +static inline struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev) +{ + return NULL; +} + +static inline struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev, + int index) +{ + return ERR_PTR(-ENODEV); +} + +static inline int acpi_spi_count_resources(struct acpi_device *adev) +{ + return 0; +} #endif /* diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 25fbf960b474..b86ddca46b9e 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -55,6 +55,7 @@ enum thermal_notify_event { THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */ THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ + THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */ }; /** diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 23cd17942036..9eb77c9007e6 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -34,10 +34,13 @@ struct reclaim_stat { unsigned nr_lazyfree_fail; }; -enum writeback_stat_item { +/* Stat data for system wide items */ +enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, - NR_VM_WRITEBACK_STAT_ITEMS, + NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */ + NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */ + NR_VM_STAT_ITEMS, }; #ifdef CONFIG_VM_EVENT_COUNTERS @@ -514,21 +517,13 @@ static inline const char *lru_list_name(enum lru_list lru) return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } -static inline const char *writeback_stat_name(enum writeback_stat_item item) -{ - return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_EVENT_ITEMS + - NR_VM_NODE_STAT_ITEMS + - item]; -} - #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + - NR_VM_WRITEBACK_STAT_ITEMS + + NR_VM_STAT_ITEMS + item]; } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ @@ -625,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio, lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } -void __meminit mod_node_early_perpage_metadata(int nid, long delta); -void __meminit store_early_perpage_metadata(void); - +void memmap_boot_pages_add(long delta); +void memmap_pages_add(long delta); #endif /* _LINUX_VMSTAT_H */ diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e372a88e8c3f..d1d073089f38 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -206,14 +206,17 @@ enum { */ HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - /* When this quirk is set, the controller has validated that - * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are - * valid. This mechanism is necessary as many controllers have - * been seen has having trouble initiating a connectable - * advertisement despite the state combination being reported as - * supported. + /* When this quirk is set, the LE states reported through the + * HCI_LE_READ_SUPPORTED_STATES are invalid/broken. + * + * This mechanism is necessary as many controllers have been seen has + * having trouble initiating a connectable advertisement despite the + * state combination being reported as supported. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. */ - HCI_QUIRK_VALID_LE_STATES, + HCI_QUIRK_BROKEN_LE_STATES, /* When this quirk is set, then erroneous data reporting * is ignored. This is mainly due to the fact that the HCI diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 31020891fc68..e449dba698f3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -825,7 +825,7 @@ extern struct mutex hci_cb_list_lock; } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ + (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ (hdev->le_states[4] & 0x08) && /* Central */ \ (hdev->le_states[4] & 0x40) && /* Peripheral */ \ (hdev->le_states[3] & 0x10)) /* Simultaneous */ diff --git a/include/net/dsa.h b/include/net/dsa.h index b06f97ae3da1..d7a6c2930277 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -403,14 +403,18 @@ struct dsa_switch { */ u32 configure_vlan_while_not_filtering:1; - /* If the switch driver always programs the CPU port as egress tagged - * despite the VLAN configuration indicating otherwise, then setting - * @untag_bridge_pvid will force the DSA receive path to pop the - * bridge's default_pvid VLAN tagged frames to offer a consistent - * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge - * device. + /* Pop the default_pvid of VLAN-unaware bridge ports from tagged frames. + * DEPRECATED: Do NOT set this field in new drivers. Instead look at + * the dsa_software_vlan_untag() comments. */ u32 untag_bridge_pvid:1; + /* Pop the default_pvid of VLAN-aware bridge ports from tagged frames. + * Useful if the switch cannot preserve the VLAN tag as seen on the + * wire for user port ingress, and chooses to send all frames as + * VLAN-tagged to the CPU, including those which were originally + * untagged. + */ + u32 untag_vlan_aware_bridge_pvid:1; /* Let DSA manage the FDB entries towards the * CPU, based on the software bridge database. diff --git a/include/net/kcm.h b/include/net/kcm.h index 90279e5e09a5..441e993be634 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -70,6 +70,7 @@ struct kcm_sock { struct work_struct tx_work; struct list_head wait_psock_list; struct sk_buff *seq_skb; + struct mutex tx_mutex; u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6a37b29f4b4c..462c653e1017 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -813,6 +813,9 @@ struct ocelot { const u32 *const *map; struct list_head stats_regions; + spinlock_t inj_lock; + spinlock_t xtr_lock; + u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; int packet_buffer_size; int num_frame_refs; @@ -966,10 +969,17 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, u32 val, u32 reg, u32 offset); /* Packet I/O */ +void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp); bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb); -void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag); +void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb); int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index c601a4598b0d..eb19668a06db 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -13,6 +13,7 @@ */ #define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, upstream) ((upstream) << 16 | (port)) #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2) @@ -499,6 +500,7 @@ struct ocelot_vcap_key_vlan { struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ enum ocelot_vcap_bit dei; /* DEI */ enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ + enum ocelot_vcap_bit tpid; }; struct ocelot_vcap_key_etype { diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2aaf7ee256ac..adc2524fd8e3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -421,7 +421,7 @@ enum io_uring_msg_ring_flags { * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { - __u64 user_data; /* sqe->data submission passed back */ + __u64 user_data; /* sqe->user_data value passed back */ __s32 res; /* result code for this event */ __u32 flags; diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 91583690bddc..f33d914d8f46 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,14 +8,11 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) diff --git a/init/Kconfig b/init/Kconfig index 37260d17267e..5783a0b87517 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1920,7 +1920,7 @@ config RUST config RUSTC_VERSION_TEXT string depends on RUST - default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n) + default "$(shell,$(RUSTC) --version 2>/dev/null)" config BINDGEN_VERSION_TEXT string @@ -1928,7 +1928,7 @@ config BINDGEN_VERSION_TEXT # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 # (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when # the minimum version is upgraded past that (0.69.1 already fixed the issue). - default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version workaround-for-0.69.0 || echo n) + default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)" # # Place an empty function call at each tracepoint site. Can be diff --git a/io_uring/napi.c b/io_uring/napi.c index a3dc3762008f..1de1d4d62925 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -26,7 +26,6 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, hlist_for_each_entry_rcu(e, hash_list, node) { if (e->napi_id != napi_id) continue; - e->timeout = jiffies + NAPI_TIMEOUT; return e; } @@ -302,7 +301,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); - if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled) + if (!(ctx->flags & IORING_SETUP_SQPOLL)) io_napi_blocking_busy_loop(ctx, iowq); } diff --git a/io_uring/napi.h b/io_uring/napi.h index 88f1c21d5548..27b88c3eb428 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; struct socket *sock; - if (!READ_ONCE(ctx->napi_busy_poll_dt)) + if (!READ_ONCE(ctx->napi_enabled)) return; sock = sock_from_file(req->file); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index b3722e5275e7..3b50dc9586d1 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -44,7 +44,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(data_race(sqd->thread) == current); atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); diff --git a/kernel/cpu.c b/kernel/cpu.c index 1209ddaec026..b1fd2a3db91a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2689,6 +2689,16 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) return ret; } +/** + * Check if the core a CPU belongs to is online + */ +#if !defined(topology_is_core_online) +static inline bool topology_is_core_online(unsigned int cpu) +{ + return true; +} +#endif + int cpuhp_smt_enable(void) { int cpu, ret = 0; @@ -2699,7 +2709,7 @@ int cpuhp_smt_enable(void) /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; - if (!cpu_smt_thread_allowed(cpu)) + if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu)) continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index d3b4cd12bdd1..64d44a52c011 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -423,7 +423,8 @@ retry: if (high && search_end == CRASH_ADDR_HIGH_MAX) { search_end = CRASH_ADDR_LOW_MAX; search_base = 0; - goto retry; + if (search_end != CRASH_ADDR_HIGH_MAX) + goto retry; } pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fb2c77368d18..a9a0ca605d4a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -160,38 +160,6 @@ unsigned long kallsyms_sym_address(int idx) return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; } -static void cleanup_symbol_name(char *s) -{ - char *res; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return; - - /* - * LLVM appends various suffixes for local functions and variables that - * must be promoted to global scope as part of LTO. This can break - * hooking of static functions with kprobes. '.' is not a valid - * character in an identifier in C. Suffixes only in LLVM LTO observed: - * - foo.llvm.[0-9a-f]+ - */ - res = strstr(s, ".llvm."); - if (res) - *res = '\0'; - - return; -} - -static int compare_symbol_name(const char *name, char *namebuf) -{ - /* The kallsyms_seqs_of_names is sorted based on names after - * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled. - * To ensure correct bisection in kallsyms_lookup_names(), do - * cleanup_symbol_name(namebuf) before comparing name and namebuf. - */ - cleanup_symbol_name(namebuf); - return strcmp(name, namebuf); -} - static unsigned int get_symbol_seq(int index) { unsigned int i, seq = 0; @@ -219,7 +187,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(mid); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - ret = compare_symbol_name(name, namebuf); + ret = strcmp(name, namebuf); if (ret > 0) low = mid + 1; else if (ret < 0) @@ -236,7 +204,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(low - 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; low--; } @@ -248,7 +216,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(high + 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; high++; } @@ -407,8 +375,7 @@ static int kallsyms_lookup_buildid(unsigned long addr, if (modbuildid) *modbuildid = NULL; - ret = strlen(namebuf); - goto found; + return strlen(namebuf); } /* See if it's in a module or a BPF JITed image. */ @@ -422,8 +389,6 @@ static int kallsyms_lookup_buildid(unsigned long addr, ret = ftrace_mod_address_lookup(addr, symbolsize, offset, modname, namebuf); -found: - cleanup_symbol_name(namebuf); return ret; } @@ -450,8 +415,6 @@ const char *kallsyms_lookup(unsigned long addr, int lookup_symbol_name(unsigned long addr, char *symname) { - int res; - symname[0] = '\0'; symname[KSYM_NAME_LEN - 1] = '\0'; @@ -462,16 +425,10 @@ int lookup_symbol_name(unsigned long addr, char *symname) /* Grab name */ kallsyms_expand_symbol(get_symbol_offset(pos), symname, KSYM_NAME_LEN); - goto found; + return 0; } /* See if it's in a module. */ - res = lookup_module_symbol_name(addr, symname); - if (res) - return res; - -found: - cleanup_symbol_name(symname); - return 0; + return lookup_module_symbol_name(addr, symname); } /* Look up a kernel symbol and return it in a text buffer. */ diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index 2f84896a7bcb..873f7c445488 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -187,31 +187,11 @@ static void test_perf_kallsyms_lookup_name(void) stat.min, stat.max, div_u64(stat.sum, stat.real_cnt)); } -static bool match_cleanup_name(const char *s, const char *name) -{ - char *p; - int len; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return false; - - p = strstr(s, ".llvm."); - if (!p) - return false; - - len = strlen(name); - if (p - s != len) - return false; - - return !strncmp(s, name, len); -} - static int find_symbol(void *data, const char *name, unsigned long addr) { struct test_stat *stat = (struct test_stat *)data; - if (strcmp(name, stat->name) == 0 || - (!stat->perf && match_cleanup_name(name, stat->name))) { + if (!strcmp(name, stat->name)) { stat->real_cnt++; stat->addr = addr; diff --git a/kernel/panic.c b/kernel/panic.c index f861bedc1925..2a0449144f82 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -64,6 +64,8 @@ unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; static unsigned int warn_limit __read_mostly; +bool panic_triggering_all_cpu_backtrace; + int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -253,8 +255,12 @@ void check_panic_on_warn(const char *origin) */ static void panic_other_cpus_shutdown(bool crash_kexec) { - if (panic_print & PANIC_PRINT_ALL_CPU_BT) + if (panic_print & PANIC_PRINT_ALL_CPU_BT) { + /* Temporary allow non-panic CPUs to write their backtraces. */ + panic_triggering_all_cpu_backtrace = true; trigger_all_cpu_backtrace(); + panic_triggering_all_cpu_backtrace = false; + } /* * Note that smp_send_stop() is the usual SMP shutdown function, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 054c0e7784fd..c22b07049c38 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2316,7 +2316,7 @@ asmlinkage int vprintk_emit(int facility, int level, * non-panic CPUs are generating any messages, they will be * silently dropped. */ - if (other_cpu_in_panic()) + if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; if (level == LOGLEVEL_SCHED) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10cd38bce2f1..ebe7ce2f5f4a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7956,7 +7956,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, trace_access_unlock(iter->cpu_file); if (ret < 0) { - if (trace_empty(iter)) { + if (trace_empty(iter) && !iter->closed) { if ((filp->f_flags & O_NONBLOCK)) return -EAGAIN; diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index aaefb9b678c8..fa692c86f069 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -121,6 +121,8 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { v = new_root; new_node = NULL; + } else { + new_node->children[0] = NULL; } } diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index f314a0c15a6d..2abc78367dd1 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -668,7 +668,6 @@ DEFINE_TEST_ALLOC(devm_kzalloc, devm_kfree, 1, 1, 0); static void overflow_allocation_test(struct kunit *test) { - const char device_name[] = "overflow-test"; struct device *dev; int count = 0; @@ -678,7 +677,7 @@ static void overflow_allocation_test(struct kunit *test) } while (0) /* Create dummy device for devm_kmalloc()-family tests. */ - dev = kunit_device_register(test, device_name); + dev = kunit_device_register(test, "overflow-test"); KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev), "Cannot register test device\n"); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f4be468e06a4..67c86a5d64a6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { spin_unlock(vmf->ptl); - goto out; + return 0; } pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { flags |= TNF_MIGRATED; nid = target_nid; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { - spin_unlock(vmf->ptl); - goto out; - } - goto out_map; - } - -out: - if (nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; + } - return 0; - + flags |= TNF_MIGRATE_FAIL; + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { + spin_unlock(vmf->ptl); + return 0; + } out_map: /* Restore the PMD */ pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1753,7 +1747,10 @@ out_map: set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; } /* diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 829112b0a914..0c3f56b3578e 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, static inline void free_vmemmap_page(struct page *page) { if (PageReserved(page)) { + memmap_boot_pages_add(-1); free_bootmem_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1); } else { + memmap_pages_add(-1); __free_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1); } } @@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, copy_page(page_to_virt(walk.reuse_page), (void *)walk.reuse_addr); list_add(&walk.reuse_page->lru, vmemmap_pages); - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1); + memmap_pages_add(1); } /* @@ -392,14 +392,11 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, for (i = 0; i < nr_pages; i++) { page = alloc_pages_node(nid, gfp_mask, 0); - if (!page) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i); + if (!page) goto out; - } list_add(&page->lru, list); } - - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages); + memmap_pages_add(nr_pages); return 0; out: diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 2aeea4d8bf8e..417c96f2da28 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -1842,9 +1842,12 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, buf = endp + 1; cfd = simple_strtoul(buf, &endp, 10); - if ((*endp != ' ') && (*endp != '\0')) + if (*endp == '\0') + buf = endp; + else if (*endp == ' ') + buf = endp + 1; + else return -EINVAL; - buf = endp + 1; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 581d3e5c9117..7066fc84f351 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2417,7 +2417,7 @@ struct memory_failure_entry { struct memory_failure_cpu { DECLARE_KFIFO(fifo, struct memory_failure_entry, MEMORY_FAILURE_FIFO_SIZE); - spinlock_t lock; + raw_spinlock_t lock; struct work_struct work; }; @@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags) { struct memory_failure_cpu *mf_cpu; unsigned long proc_flags; + bool buffer_overflow; struct memory_failure_entry entry = { .pfn = pfn, .flags = flags, }; mf_cpu = &get_cpu_var(memory_failure_cpu); - spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, entry)) + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); + buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); + if (!buffer_overflow) schedule_work_on(smp_processor_id(), &mf_cpu->work); - else + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + put_cpu_var(memory_failure_cpu); + if (buffer_overflow) pr_err("buffer overflow when queuing memory failure at %#lx\n", pfn); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); - put_cpu_var(memory_failure_cpu); } EXPORT_SYMBOL_GPL(memory_failure_queue); @@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work) mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { - spin_lock_irqsave(&mf_cpu->lock, proc_flags); + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; if (entry.flags & MF_SOFT_OFFLINE) @@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void) for_each_possible_cpu(cpu) { mf_cpu = &per_cpu(memory_failure_cpu, cpu); - spin_lock_init(&mf_cpu->lock); + raw_spin_lock_init(&mf_cpu->lock); INIT_KFIFO(mf_cpu->fifo); INIT_WORK(&mf_cpu->work, memory_failure_work_func); } diff --git a/mm/memory.c b/mm/memory.c index 34f8402d2046..3c01d68065be 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + return 0; } pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { nid = target_nid; flags |= TNF_MIGRATED; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (unlikely(!vmf->pte)) - goto out; - if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; - } - goto out_map; + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } -out: - if (nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, nid, nr_pages, flags); - return 0; + flags |= TNF_MIGRATE_FAIL; + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (unlikely(!vmf->pte)) + return 0; + if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } out_map: /* * Make it present again, depending on how arch implements @@ -5387,7 +5383,10 @@ out_map: numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, writable); pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) diff --git a/mm/migrate.c b/mm/migrate.c index e7296c0fb5d5..923ea80ba744 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1479,11 +1479,17 @@ out: return rc; } -static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, + enum migrate_mode mode) { int rc; - folio_lock(folio); + if (mode == MIGRATE_ASYNC) { + if (!folio_trylock(folio)) + return -EAGAIN; + } else { + folio_lock(folio); + } rc = split_folio_to_list(folio, split_folios); folio_unlock(folio); if (!rc) @@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from, */ if (nr_pages > 2 && !list_empty(&folio->_deferred_list)) { - if (try_split_folio(folio, split_folios) == 0) { + if (!try_split_folio(folio, split_folios, mode)) { nr_failed++; stats->nr_thp_failed += is_thp; stats->nr_thp_split += is_thp; @@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from, if (!thp_migration_supported() && is_thp) { nr_failed++; stats->nr_thp_failed++; - if (!try_split_folio(folio, split_folios)) { + if (!try_split_folio(folio, split_folios, mode)) { stats->nr_thp_split++; stats->nr_split++; continue; @@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from, stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (is_large && !nosplit) { - int ret = try_split_folio(folio, split_folios); + int ret = try_split_folio(folio, split_folios, mode); if (!ret) { stats->nr_thp_split += is_thp; diff --git a/mm/mm_init.c b/mm/mm_init.c index 75c3bd42799b..51960079875b 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) panic("Failed to allocate %ld bytes for node %d memory map\n", size, pgdat->node_id); pgdat->node_mem_map = map + offset; - mod_node_early_perpage_metadata(pgdat->node_id, - DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", __func__, pgdat->node_id, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); @@ -2245,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page) set_pageblock_migratetype(page, MIGRATE_CMA); set_page_refcounted(page); + /* pages were reserved and not allocated */ + clear_page_tag_ref(page); __free_pages(page, pageblock_order); adjust_managed_page_count(page, pageblock_nr_pages); @@ -2460,15 +2461,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, } /* pages were reserved and not allocated */ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - + clear_page_tag_ref(page); __free_pages_core(page, order, MEMINIT_EARLY); } diff --git a/mm/mseal.c b/mm/mseal.c index bf783bba8ed0..15bba28acc00 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma) static bool is_madv_discard(int behavior) { - return behavior & - (MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED | - MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK); + switch (behavior) { + case MADV_FREE: + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: + case MADV_REMOVE: + case MADV_DONTFORK: + case MADV_WIPEONFORK: + return true; + } + + return false; } static bool is_ro_anon(struct vm_area_struct *vma) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 28f80daf5c04..c565de8f48e9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3368,6 +3365,8 @@ retry: } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3393,10 +3392,8 @@ check_alloc_wmark: gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3450,10 +3447,8 @@ try_this_zone: return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -5755,7 +5750,6 @@ void __init setup_per_cpu_pageset(void) for_each_online_pgdat(pgdat) pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); - store_early_perpage_metadata(); } __meminit void zone_pcp_init(struct zone *zone) @@ -5821,14 +5815,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char void free_reserved_page(struct page *page) { - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } + clear_page_tag_ref(page); ClearPageReserved(page); init_page_count(page); __free_page(page); @@ -6951,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone) struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6979,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; - int ret = false; + bool ret = false; + + if (!has_unaccepted_memory()) + return false; + + if (list_empty(&zone->unaccepted_pages)) + return false; /* How much to accept to get to high watermark? */ to_accept = high_wmark_pages(zone) - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); - /* Accept at least one page */ - do { + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -7038,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { return false; } diff --git a/mm/page_ext.c b/mm/page_ext.c index c191e490c401..641d93f6af4c 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid) return -ENOMEM; NODE_DATA(nid)->node_page_ext = base; total_usage += table_size; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - DIV_ROUND_UP(table_size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE)); return 0; } @@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid) else addr = vzalloc_node(size, nid); - if (addr) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(size, PAGE_SIZE)); - } + if (addr) + memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); return addr; } @@ -323,25 +320,18 @@ static void free_page_ext(void *addr) { size_t table_size; struct page *page; - struct pglist_data *pgdat; table_size = page_ext_size * PAGES_PER_SECTION; + memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); if (is_vmalloc_addr(addr)) { - page = vmalloc_to_page(addr); - pgdat = page_pgdat(page); vfree(addr); } else { page = virt_to_page(addr); - pgdat = page_pgdat(page); BUG_ON(PageReserved(page)); kmemleak_free(addr); free_pages_exact(addr, table_size); } - - mod_node_page_state(pgdat, NR_MEMMAP, - -1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); - } static void __free_page_ext(unsigned long pfn) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 1dda6c53370b..edcc7a6b0f6f 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, if (r < 0) return NULL; - if (system_state == SYSTEM_BOOTING) { - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start, - PAGE_SIZE)); - } else { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(end - start, PAGE_SIZE)); - } + if (system_state == SYSTEM_BOOTING) + memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); + else + memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); return pfn_to_page(pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index e4b830091d13..0f018c6f9ec5 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid) sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; #ifndef CONFIG_SPARSEMEM_VMEMMAP - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); #endif } @@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); - mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP, - -1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); + memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, altmap); } static void free_map_bootmem(struct page *memmap) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b783baf12a1..af2de36549d6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, page = alloc_pages_noprof(alloc_gfp, order); else page = alloc_pages_node_noprof(nid, alloc_gfp, order); - if (unlikely(!page)) { - if (!nofail) - break; - - /* fall back to the zero order allocations */ - alloc_gfp |= __GFP_NOFAIL; - order = 0; - continue; - } + if (unlikely(!page)) + break; /* * Higher order allocations must be able to be treated as diff --git a/mm/vmstat.c b/mm/vmstat.c index 04a1cb6cc636..e875f2a4915f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat, } #endif +/* + * Count number of pages "struct page" and "struct page_ext" consume. + * nr_memmap_boot_pages: # of pages allocated by boot allocator + * nr_memmap_pages: # of pages that were allocated by buddy allocator + */ +static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0); +static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0); + +void memmap_boot_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_boot_pages); +} + +void memmap_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_pages); +} + #ifdef CONFIG_COMPACTION struct contig_page_info { @@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = { "pgdemote_kswapd", "pgdemote_direct", "pgdemote_khugepaged", - "nr_memmap", - "nr_memmap_boot", - /* enum writeback_stat_item counters */ + /* system-wide enum vm_stat_item counters */ "nr_dirty_threshold", "nr_dirty_background_threshold", + "nr_memmap_pages", + "nr_memmap_boot_pages", #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) /* enum vm_event_item counters */ @@ -1790,7 +1808,7 @@ static const struct seq_operations zoneinfo_op = { #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ NR_VM_NUMA_EVENT_ITEMS + \ NR_VM_NODE_STAT_ITEMS + \ - NR_VM_WRITEBACK_STAT_ITEMS + \ + NR_VM_STAT_ITEMS + \ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ NR_VM_EVENT_ITEMS : 0)) @@ -1827,7 +1845,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, v + NR_DIRTY_THRESHOLD); - v += NR_VM_WRITEBACK_STAT_ITEMS; + v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages); + v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages); + v += NR_VM_STAT_ITEMS; #ifdef CONFIG_VM_EVENT_COUNTERS all_vm_events(v); @@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void) module_init(extfrag_debug_init); #endif - -/* - * Page metadata size (struct page and page_ext) in pages - */ -static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata; - -void __meminit mod_node_early_perpage_metadata(int nid, long delta) -{ - early_perpage_metadata[nid] += delta; -} - -void __meminit store_early_perpage_metadata(void) -{ - int nid; - struct pglist_data *pgdat; - - for_each_online_pgdat(pgdat) { - nid = pgdat->node_id; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - early_perpage_metadata[nid]); - } -} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 06da8ac13dca..f25a21f532aa 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3664,19 +3664,19 @@ static void hci_sched_le(struct hci_dev *hdev) { struct hci_chan *chan; struct sk_buff *skb; - int quote, cnt, tmp; + int quote, *cnt, tmp; BT_DBG("%s", hdev->name); if (!hci_conn_num(hdev, LE_LINK)) return; - cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; + cnt = hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt; - __check_timeout(hdev, cnt, LE_LINK); + __check_timeout(hdev, *cnt, LE_LINK); - tmp = cnt; - while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { + tmp = *cnt; + while (*cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { u32 priority = (skb_peek(&chan->data_q))->priority; while (quote-- && (skb = skb_peek(&chan->data_q))) { BT_DBG("chan %p skb %p len %d priority %u", chan, skb, @@ -3691,7 +3691,7 @@ static void hci_sched_le(struct hci_dev *hdev) hci_send_frame(hdev, skb); hdev->le_last_tx = jiffies; - cnt--; + (*cnt)--; chan->sent++; chan->conn->sent++; @@ -3701,12 +3701,7 @@ static void hci_sched_le(struct hci_dev *hdev) } } - if (hdev->le_pkts) - hdev->le_cnt = cnt; - else - hdev->acl_cnt = cnt; - - if (cnt != tmp) + if (*cnt != tmp) hci_prio_recalculate(hdev, LE_LINK); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d0c118c47f6c..1c82dcdf6e8f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5920,7 +5920,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, * while we have an existing one in peripheral role. */ if (hdev->conn_hash.le_num_peripheral > 0 && - (!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) || + (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) || !(hdev->le_states[3] & 0x10))) return NULL; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 40d4887c7f79..25979f4283a6 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3456,6 +3456,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, * will be kept and this function does nothing. */ p = hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); + if (!p) { + err = -EIO; + goto unlock; + } if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT) p->auto_connect = HCI_AUTO_CONN_DISABLED; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 1e7ea3a4b7ef..4f9fdf400584 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -914,7 +914,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, * Confirms and the responder Enters the passkey. */ if (smp->method == OVERLAP) { - if (hcon->role == HCI_ROLE_MASTER) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp->method = CFM_PASSKEY; else smp->method = REQ_PASSKEY; @@ -964,7 +964,7 @@ static u8 smp_confirm(struct smp_chan *smp) smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); - if (conn->hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); else SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -980,7 +980,8 @@ static u8 smp_random(struct smp_chan *smp) int ret; bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn, - conn->hcon->out ? "initiator" : "responder"); + test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : + "responder"); ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp, hcon->init_addr_type, &hcon->init_addr, @@ -994,7 +995,7 @@ static u8 smp_random(struct smp_chan *smp) return SMP_CONFIRM_FAILED; } - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { u8 stk[16]; __le64 rand = 0; __le16 ediv = 0; @@ -1256,14 +1257,15 @@ static void smp_distribute_keys(struct smp_chan *smp) rsp = (void *) &smp->prsp[1]; /* The responder sends its keys first */ - if (hcon->out && (smp->remote_key_dist & KEY_DIST_MASK)) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags) && + (smp->remote_key_dist & KEY_DIST_MASK)) { smp_allow_key_dist(smp); return; } req = (void *) &smp->preq[1]; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { keydist = &rsp->init_key_dist; *keydist &= req->init_key_dist; } else { @@ -1432,7 +1434,7 @@ static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16]) struct hci_conn *hcon = smp->conn->hcon; u8 *na, *nb, a[7], b[7]; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { na = smp->prnd; nb = smp->rrnd; } else { @@ -1460,7 +1462,7 @@ static void sc_dhkey_check(struct smp_chan *smp) a[6] = hcon->init_addr_type; b[6] = hcon->resp_addr_type; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local_addr = a; remote_addr = b; memcpy(io_cap, &smp->preq[1], 3); @@ -1539,7 +1541,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) /* The round is only complete when the initiator * receives pairing random. */ - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); if (smp->passkey_round == 20) @@ -1567,7 +1569,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); return 0; @@ -1578,7 +1580,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) case SMP_CMD_PUBLIC_KEY: default: /* Initiating device starts the round */ - if (!hcon->out) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return 0; bt_dev_dbg(hdev, "Starting passkey round %u", @@ -1623,7 +1625,7 @@ static int sc_user_reply(struct smp_chan *smp, u16 mgmt_op, __le32 passkey) } /* Initiator sends DHKey check first */ - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { sc_dhkey_check(smp); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); } else if (test_and_clear_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags)) { @@ -1746,7 +1748,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_cmd_pairing rsp, *req = (void *) skb->data; struct l2cap_chan *chan = conn->smp; struct hci_dev *hdev = conn->hcon->hdev; - struct smp_chan *smp; + struct smp_chan *smp = chan->data; u8 key_size, auth, sec_level; int ret; @@ -1755,16 +1757,14 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (skb->len < sizeof(*req)) return SMP_INVALID_PARAMS; - if (conn->hcon->role != HCI_ROLE_SLAVE) + if (smp && test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_CMD_NOTSUPP; - if (!chan->data) + if (!smp) { smp = smp_chan_create(conn); - else - smp = chan->data; - - if (!smp) - return SMP_UNSPECIFIED; + if (!smp) + return SMP_UNSPECIFIED; + } /* We didn't start the pairing, so match remote */ auth = req->auth_req & AUTH_REQ_MASK(hdev); @@ -1946,7 +1946,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (skb->len < sizeof(*rsp)) return SMP_INVALID_PARAMS; - if (conn->hcon->role != HCI_ROLE_MASTER) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_CMD_NOTSUPP; skb_pull(skb, sizeof(*rsp)); @@ -2041,7 +2041,7 @@ static u8 sc_check_confirm(struct smp_chan *smp) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM); - if (conn->hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -2063,7 +2063,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp) u8 auth; /* The issue is only observed when we're in responder role */ - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_UNSPECIFIED; if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { @@ -2099,7 +2099,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) struct hci_dev *hdev = hcon->hdev; bt_dev_dbg(hdev, "conn %p %s", conn, - hcon->out ? "initiator" : "responder"); + test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : + "responder"); if (skb->len < sizeof(smp->pcnf)) return SMP_INVALID_PARAMS; @@ -2121,7 +2122,7 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) return ret; } - if (conn->hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -2156,7 +2157,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_bit(SMP_FLAG_SC, &smp->flags)) return smp_random(smp); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { pkax = smp->local_pk; pkbx = smp->remote_pk; na = smp->prnd; @@ -2169,7 +2170,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - if (!hcon->out) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); @@ -2180,7 +2181,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_RANDOM); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { u8 cfm[16]; err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->local_pk, @@ -2221,7 +2222,7 @@ mackey_and_ltk: return SMP_UNSPECIFIED; if (smp->method == REQ_OOB) { - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { sc_dhkey_check(smp); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); } @@ -2295,10 +2296,27 @@ bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, return false; } +static void smp_send_pairing_req(struct smp_chan *smp, __u8 auth) +{ + struct smp_cmd_pairing cp; + + if (smp->conn->hcon->type == ACL_LINK) + build_bredr_pairing_cmd(smp, &cp, NULL); + else + build_pairing_cmd(smp->conn, &cp, NULL, auth); + + smp->preq[0] = SMP_CMD_PAIRING_REQ; + memcpy(&smp->preq[1], &cp, sizeof(cp)); + + smp_send_cmd(smp->conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); + + set_bit(SMP_FLAG_INITIATOR, &smp->flags); +} + static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_security_req *rp = (void *) skb->data; - struct smp_cmd_pairing cp; struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; struct smp_chan *smp; @@ -2347,16 +2365,20 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) skb_pull(skb, sizeof(*rp)); - memset(&cp, 0, sizeof(cp)); - build_pairing_cmd(conn, &cp, NULL, auth); + smp_send_pairing_req(smp, auth); - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &cp, sizeof(cp)); + return 0; +} - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); +static void smp_send_security_req(struct smp_chan *smp, __u8 auth) +{ + struct smp_cmd_security_req cp; - return 0; + cp.auth_req = auth; + smp_send_cmd(smp->conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); + + clear_bit(SMP_FLAG_INITIATOR, &smp->flags); } int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) @@ -2427,23 +2449,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) authreq |= SMP_AUTH_MITM; } - if (hcon->role == HCI_ROLE_MASTER) { - struct smp_cmd_pairing cp; - - build_pairing_cmd(conn, &cp, NULL, authreq); - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &cp, sizeof(cp)); - - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); - } else { - struct smp_cmd_security_req cp; - cp.auth_req = authreq; - smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); - } + if (hcon->role == HCI_ROLE_MASTER) + smp_send_pairing_req(smp, authreq); + else + smp_send_security_req(smp, authreq); - set_bit(SMP_FLAG_INITIATOR, &smp->flags); ret = 0; unlock: @@ -2694,8 +2704,6 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) static u8 sc_select_method(struct smp_chan *smp) { - struct l2cap_conn *conn = smp->conn; - struct hci_conn *hcon = conn->hcon; struct smp_cmd_pairing *local, *remote; u8 local_mitm, remote_mitm, local_io, remote_io, method; @@ -2708,7 +2716,7 @@ static u8 sc_select_method(struct smp_chan *smp) * the "struct smp_cmd_pairing" from them we need to skip the * first byte which contains the opcode. */ - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local = (void *) &smp->preq[1]; remote = (void *) &smp->prsp[1]; } else { @@ -2777,7 +2785,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) /* Non-initiating device sends its public key after receiving * the key from the initiating device. */ - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { err = sc_send_public_key(smp); if (err) return err; @@ -2839,7 +2847,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); @@ -2848,7 +2856,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) return 0; } - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); if (smp->method == REQ_PASSKEY) { @@ -2863,7 +2871,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) /* The Initiating device waits for the non-initiating device to * send the confirm value. */ - if (conn->hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return 0; err = smp_f4(smp->tfm_cmac, smp->local_pk, smp->remote_pk, smp->prnd, @@ -2897,7 +2905,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) a[6] = hcon->init_addr_type; b[6] = hcon->resp_addr_type; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local_addr = a; remote_addr = b; memcpy(io_cap, &smp->prsp[1], 3); @@ -2922,7 +2930,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (crypto_memneq(check->e, e, 16)) return SMP_DHKEY_CHECK_FAILED; - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { if (test_bit(SMP_FLAG_WAIT_USER, &smp->flags)) { set_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags); return 0; @@ -2934,7 +2942,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) sc_add_ltk(smp); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { hci_le_start_enc(hcon, 0, 0, smp->tk, smp->enc_key_size); hcon->enc_key_size = smp->enc_key_size; } @@ -3083,7 +3091,6 @@ static void bredr_pairing(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; - struct smp_cmd_pairing req; struct smp_chan *smp; bt_dev_dbg(hdev, "chan %p", chan); @@ -3135,14 +3142,7 @@ static void bredr_pairing(struct l2cap_chan *chan) bt_dev_dbg(hdev, "starting SMP over BR/EDR"); - /* Prepare and send the BR/EDR SMP Pairing Request */ - build_bredr_pairing_cmd(smp, &req, NULL); - - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &req, sizeof(req)); - - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(req), &req); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); + smp_send_pairing_req(smp, 0x00); } static void smp_resume_cb(struct l2cap_chan *chan) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a58ea724790c..cff1f89719b6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,7 +228,6 @@ void netpoll_poll_disable(struct net_device *dev) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); } -EXPORT_SYMBOL(netpoll_poll_disable); void netpoll_poll_enable(struct net_device *dev) { @@ -239,7 +238,6 @@ void netpoll_poll_enable(struct net_device *dev) up(&ni->dev_lock); rcu_read_unlock(); } -EXPORT_SYMBOL(netpoll_poll_enable); static void refill_skbs(void) { diff --git a/net/dsa/tag.c b/net/dsa/tag.c index 6e402d49afd3..79ad105902d9 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -105,8 +105,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, p = netdev_priv(skb->dev); - if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { - nskb = dsa_untag_bridge_pvid(skb); + if (unlikely(cpu_dp->ds->untag_bridge_pvid || + cpu_dp->ds->untag_vlan_aware_bridge_pvid)) { + nskb = dsa_software_vlan_untag(skb); if (!nskb) { kfree_skb(skb); return 0; diff --git a/net/dsa/tag.h b/net/dsa/tag.h index f6b9c73718df..d5707870906b 100644 --- a/net/dsa/tag.h +++ b/net/dsa/tag.h @@ -44,46 +44,81 @@ static inline struct net_device *dsa_conduit_find_user(struct net_device *dev, return NULL; } -/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged - * frames as untagged, since the bridge will not untag them. +/** + * dsa_software_untag_vlan_aware_bridge: Software untagging for VLAN-aware bridge + * @skb: Pointer to received socket buffer (packet) + * @br: Pointer to bridge upper interface of ingress port + * @vid: Parsed VID from packet + * + * The bridge can process tagged packets. Software like STP/PTP may not. The + * bridge can also process untagged packets, to the same effect as if they were + * tagged with the PVID of the ingress port. So packets tagged with the PVID of + * the bridge port must be software-untagged, to support both use cases. */ -static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) +static inline void dsa_software_untag_vlan_aware_bridge(struct sk_buff *skb, + struct net_device *br, + u16 vid) { - struct dsa_port *dp = dsa_user_to_port(skb->dev); - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct net_device *dev = skb->dev; - struct net_device *upper_dev; - u16 vid, pvid, proto; + u16 pvid, proto; int err; - if (!br || br_vlan_enabled(br)) - return skb; - err = br_vlan_get_proto(br, &proto); if (err) - return skb; + return; - /* Move VLAN tag from data to hwaccel */ - if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) { - skb = skb_vlan_untag(skb); - if (!skb) - return NULL; - } + err = br_vlan_get_pvid_rcu(skb->dev, &pvid); + if (err) + return; - if (!skb_vlan_tag_present(skb)) - return skb; + if (vid == pvid && skb->vlan_proto == htons(proto)) + __vlan_hwaccel_clear_tag(skb); +} - vid = skb_vlan_tag_get_id(skb); +/** + * dsa_software_untag_vlan_unaware_bridge: Software untagging for VLAN-unaware bridge + * @skb: Pointer to received socket buffer (packet) + * @br: Pointer to bridge upper interface of ingress port + * @vid: Parsed VID from packet + * + * The bridge ignores all VLAN tags. Software like STP/PTP may not (it may run + * on the plain port, or on a VLAN upper interface). Maybe packets are coming + * to software as tagged with a driver-defined VID which is NOT equal to the + * PVID of the bridge port (since the bridge is VLAN-unaware, its configuration + * should NOT be committed to hardware). DSA needs a method for this private + * VID to be communicated by software to it, and if packets are tagged with it, + * software-untag them. Note: the private VID may be different per bridge, to + * support the FDB isolation use case. + * + * FIXME: this is currently implemented based on the broken assumption that + * the "private VID" used by the driver in VLAN-unaware mode is equal to the + * bridge PVID. It should not be, except for a coincidence; the bridge PVID is + * irrelevant to the data path in the VLAN-unaware mode. Thus, the VID that + * this function removes is wrong. + * + * All users of ds->untag_bridge_pvid should fix their drivers, if necessary, + * to make the two independent. Only then, if there still remains a need to + * strip the private VID from packets, then a new ds->ops->get_private_vid() + * API shall be introduced to communicate to DSA what this VID is, which needs + * to be stripped here. + */ +static inline void dsa_software_untag_vlan_unaware_bridge(struct sk_buff *skb, + struct net_device *br, + u16 vid) +{ + struct net_device *upper_dev; + u16 pvid, proto; + int err; - /* We already run under an RCU read-side critical section since - * we are called from netif_receive_skb_list_internal(). - */ - err = br_vlan_get_pvid_rcu(dev, &pvid); + err = br_vlan_get_proto(br, &proto); if (err) - return skb; + return; - if (vid != pvid) - return skb; + err = br_vlan_get_pvid_rcu(skb->dev, &pvid); + if (err) + return; + + if (vid != pvid || skb->vlan_proto != htons(proto)) + return; /* The sad part about attempting to untag from DSA is that we * don't know, unless we check, if the skb will end up in @@ -95,10 +130,50 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) * definitely keep the tag, to make sure it keeps working. */ upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid); - if (upper_dev) + if (!upper_dev) + __vlan_hwaccel_clear_tag(skb); +} + +/** + * dsa_software_vlan_untag: Software VLAN untagging in DSA receive path + * @skb: Pointer to socket buffer (packet) + * + * Receive path method for switches which cannot avoid tagging all packets + * towards the CPU port. Called when ds->untag_bridge_pvid (legacy) or + * ds->untag_vlan_aware_bridge_pvid is set to true. + * + * As a side effect of this method, any VLAN tag from the skb head is moved + * to hwaccel. + */ +static inline struct sk_buff *dsa_software_vlan_untag(struct sk_buff *skb) +{ + struct dsa_port *dp = dsa_user_to_port(skb->dev); + struct net_device *br = dsa_port_bridge_dev_get(dp); + u16 vid; + + /* software untagging for standalone ports not yet necessary */ + if (!br) return skb; - __vlan_hwaccel_clear_tag(skb); + /* Move VLAN tag from data to hwaccel */ + if (!skb_vlan_tag_present(skb)) { + skb = skb_vlan_untag(skb); + if (!skb) + return NULL; + } + + if (!skb_vlan_tag_present(skb)) + return skb; + + vid = skb_vlan_tag_get_id(skb); + + if (br_vlan_enabled(br)) { + if (dp->ds->untag_vlan_aware_bridge_pvid) + dsa_software_untag_vlan_aware_bridge(skb, br, vid); + } else { + if (dp->ds->untag_bridge_pvid) + dsa_software_untag_vlan_unaware_bridge(skb, br, vid); + } return skb; } diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index e0e4300bfbd3..bf6608fc6be7 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -8,40 +8,6 @@ #define OCELOT_NAME "ocelot" #define SEVILLE_NAME "seville" -/* If the port is under a VLAN-aware bridge, remove the VLAN header from the - * payload and move it into the DSA tag, which will make the switch classify - * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, - * which is the pvid of standalone and VLAN-unaware bridge ports. - */ -static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, - u64 *vlan_tci, u64 *tag_type) -{ - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct vlan_ethhdr *hdr; - u16 proto, tci; - - if (!br || !br_vlan_enabled(br)) { - *vlan_tci = 0; - *tag_type = IFH_TAG_TYPE_C; - return; - } - - hdr = skb_vlan_eth_hdr(skb); - br_vlan_get_proto(br, &proto); - - if (ntohs(hdr->h_vlan_proto) == proto) { - vlan_remove_tag(skb, &tci); - *vlan_tci = tci; - } else { - rcu_read_lock(); - br_vlan_get_pvid_rcu(br, &tci); - rcu_read_unlock(); - *vlan_tci = tci; - } - - *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; -} - static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, __be32 ifh_prefix, void **ifh) { @@ -53,7 +19,8 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, u32 rew_op = 0; u64 qos_class; - ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); + ocelot_xmit_get_vlan_info(skb, dsa_port_bridge_dev_get(dp), &vlan_tci, + &tag_type); qos_class = netdev_get_num_tc(netdev) ? netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fd17f25ff288..a4e510846905 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,6 +97,8 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; +static DEFINE_MUTEX(tcp_exit_batch_mutex); + static u32 tcp_v4_init_seq(const struct sk_buff *skb) { return secure_tcp_seq(ip_hdr(skb)->daddr, @@ -3514,6 +3516,16 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; + /* make sure concurrent calls to tcp_sk_exit_batch from net_cleanup_work + * and failed setup_net error unwinding path are serialized. + * + * tcp_twsk_purge() handles twsk in any dead netns, not just those in + * net_exit_list, the thread that dismantles a particular twsk must + * do so without other thread progressing to refcount_dec_and_test() of + * tcp_death_row.tw_refcount. + */ + mutex_lock(&tcp_exit_batch_mutex); + tcp_twsk_purge(net_exit_list); list_for_each_entry(net, net_exit_list, exit_list) { @@ -3521,6 +3533,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); tcp_fastopen_ctx_destroy(net); } + + mutex_unlock(&tcp_exit_batch_mutex); } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b254a5dadfcf..d842303587af 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -279,7 +279,8 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, return ERR_PTR(-EINVAL); if (unlikely(skb_checksum_start(gso_skb) != - skb_transport_header(gso_skb))) + skb_transport_header(gso_skb) && + !(skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST))) return ERR_PTR(-EINVAL); /* We don't know if egress device can segment and checksum the packet diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ab504d31f0cd..f26841f1490f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -70,11 +70,15 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { + /* Make sure idev stays alive */ + rcu_read_lock(); skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); return -ENOMEM; } + rcu_read_unlock(); } hdr = ipv6_hdr(skb); @@ -283,11 +287,15 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { + /* Make sure idev stays alive */ + rcu_read_lock(); skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); return -ENOBUFS; } + rcu_read_unlock(); } if (opt) { @@ -1956,6 +1964,7 @@ int ip6_send_skb(struct sk_buff *skb) struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; + rcu_read_lock(); err = ip6_local_out(net, skb->sk, skb); if (err) { if (err > 0) @@ -1965,6 +1974,7 @@ int ip6_send_skb(struct sk_buff *skb) IPSTATS_MIB_OUTDISCARDS); } + rcu_read_unlock(); return err; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9dee0c127955..87dfb565a9f8 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1507,7 +1507,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) tdev = __dev_get_by_index(t->net, p->link); if (tdev) { - dev->hard_header_len = tdev->hard_header_len + t_hlen; + dev->needed_headroom = tdev->hard_header_len + + tdev->needed_headroom + t_hlen; mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); mtu = mtu - t_hlen; @@ -1731,7 +1732,9 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); + int t_hlen; + t_hlen = tnl->hlen + sizeof(struct ipv6hdr); if (tnl->parms.proto == IPPROTO_IPV6) { if (new_mtu < IPV6_MIN_MTU) return -EINVAL; @@ -1740,10 +1743,10 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { - if (new_mtu > IP6_MAX_MTU - dev->hard_header_len) + if (new_mtu > IP6_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } else { - if (new_mtu > IP_MAX_MTU - dev->hard_header_len) + if (new_mtu > IP_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } WRITE_ONCE(dev->mtu, new_mtu); @@ -1887,12 +1890,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t_hlen = t->hlen + sizeof(struct ipv6hdr); dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; + dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len - t_hlen; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 1e42e13ad24e..d3e9efab7f4b 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -86,13 +86,15 @@ struct device *iucv_alloc_device(const struct attribute_group **attrs, { struct device *dev; va_list vargs; + char buf[20]; int rc; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto out_error; va_start(vargs, fmt); - rc = dev_set_name(dev, fmt, vargs); + vsnprintf(buf, sizeof(buf), fmt, vargs); + rc = dev_set_name(dev, "%s", buf); va_end(vargs); if (rc) goto out_error; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 2f191e50d4fc..d4118c796290 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -755,6 +755,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); int err = -EPIPE; + mutex_lock(&kcm->tx_mutex); lock_sock(sk); /* Per tcp_sendmsg this should be in poll */ @@ -926,6 +927,7 @@ partial_message: KCM_STATS_ADD(kcm->stats.tx_bytes, copied); release_sock(sk); + mutex_unlock(&kcm->tx_mutex); return copied; out_error: @@ -951,6 +953,7 @@ out_error: sk->sk_write_space(sk); release_sock(sk); + mutex_unlock(&kcm->tx_mutex); return err; } @@ -1204,6 +1207,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) spin_unlock_bh(&mux->lock); INIT_WORK(&kcm->tx_work, kcm_tx_work); + mutex_init(&kcm->tx_mutex); spin_lock_bh(&mux->rx_lock); kcm_rcv_ready(kcm); diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 77e5dd422258..8551dab1d1e6 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -366,7 +366,7 @@ static void mctp_test_route_input_sk(struct kunit *test) skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); - KUNIT_EXPECT_EQ(test, skb->len, 1); + KUNIT_EXPECT_EQ(test, skb2->len, 1); skb_free_datagram(sock->sk, skb2); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 23bb89c94e90..3e6e0f5510bb 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -60,16 +60,6 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ return 0; } -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) -{ - pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); - - spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, rm_list); - spin_unlock_bh(&msk->pm.lock); - return 0; -} - /* path manager event handlers */ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) @@ -444,9 +434,6 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id *flags = 0; *ifindex = 0; - if (!id) - return 0; - if (mptcp_pm_is_userspace(msk)) return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 4cae2aa7be5c..3e4ad801786f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -143,11 +143,13 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, return false; } -static struct mptcp_pm_addr_entry * +static bool select_local_address(const struct pm_nl_pernet *pernet, - const struct mptcp_sock *msk) + const struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *new_entry) { - struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct mptcp_pm_addr_entry *entry; + bool found = false; msk_owned_by_me(msk); @@ -159,17 +161,21 @@ select_local_address(const struct pm_nl_pernet *pernet, if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) continue; - ret = entry; + *new_entry = *entry; + found = true; break; } rcu_read_unlock(); - return ret; + + return found; } -static struct mptcp_pm_addr_entry * -select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) +static bool +select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *new_entry) { - struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct mptcp_pm_addr_entry *entry; + bool found = false; rcu_read_lock(); /* do not keep any additional per socket state, just signal @@ -184,11 +190,13 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; - ret = entry; + *new_entry = *entry; + found = true; break; } rcu_read_unlock(); - return ret; + + return found; } unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) @@ -512,9 +520,10 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { - struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL; struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry local; unsigned int add_addr_signal_max; + bool signal_and_subflow = false; unsigned int local_addr_max; struct pm_nl_pernet *pernet; unsigned int subflows_max; @@ -565,23 +574,22 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; - local = select_signal_address(pernet, msk); - if (!local) + if (!select_signal_address(pernet, msk, &local)) goto subflow; /* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows. */ - if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) + if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return; - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_nl_addr_send_ack(msk); - if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) - signal_and_subflow = local; + if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = true; } subflow: @@ -592,26 +600,22 @@ subflow: bool fullmesh; int i, nr; - if (signal_and_subflow) { - local = signal_and_subflow; - signal_and_subflow = NULL; - } else { - local = select_local_address(pernet, msk); - if (!local) - break; - } + if (signal_and_subflow) + signal_and_subflow = false; + else if (!select_local_address(pernet, msk, &local)) + break; - fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); msk->pm.local_addr_used++; - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs); + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); if (nr == 0) continue; spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); + __mptcp_subflow_connect(sk, &local.addr, &addrs[i]); spin_lock_bh(&msk->pm.lock); } mptcp_pm_nl_check_work_pending(msk); @@ -636,6 +640,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, { struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info mpc_addr; struct pm_nl_pernet *pernet; unsigned int subflows_max; int i = 0; @@ -643,6 +648,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, pernet = pm_nl_get_pernet_from_msk(msk); subflows_max = mptcp_pm_get_subflows_max(msk); + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) @@ -653,7 +660,13 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, if (msk->pm.subflows < subflows_max) { msk->pm.subflows++; - addrs[i++] = entry->addr; + addrs[i] = entry->addr; + + /* Special case for ID0: set the correct ID */ + if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port)) + addrs[i].id = 0; + + i++; } } rcu_read_unlock(); @@ -829,25 +842,27 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); - removed = true; + removed |= subflow->request_join; if (rm_type == MPTCP_MIB_RMSUBFLOW) __MPTCP_INC_STATS(sock_net(sk), rm_type); } - if (rm_type == MPTCP_MIB_RMSUBFLOW) - __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); - else if (rm_type == MPTCP_MIB_RMADDR) + + if (rm_type == MPTCP_MIB_RMADDR) __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (!removed) continue; if (!mptcp_pm_is_kernel(msk)) continue; - if (rm_type == MPTCP_MIB_RMADDR) { - msk->pm.add_addr_accepted--; - WRITE_ONCE(msk->pm.accept_addr, true); - } else if (rm_type == MPTCP_MIB_RMSUBFLOW) { - msk->pm.local_addr_used--; + if (rm_type == MPTCP_MIB_RMADDR && rm_id && + !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { + /* Note: if the subflow has been closed before, this + * add_addr_accepted counter will not be decremented. + */ + if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) + WRITE_ONCE(msk->pm.accept_addr, true); } } } @@ -857,8 +872,8 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); } -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list) +static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) { mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); } @@ -1393,6 +1408,10 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); + /* No entries with ID 0 */ + if (id == 0) + return 0; + rcu_read_lock(); entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); if (entry) { @@ -1431,13 +1450,24 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); - msk->pm.add_addr_signaled -= ret; + if (ret) { + __set_bit(addr->id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled--; + } mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } return ret; } +static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) +{ + /* If it was marked as used, and not ID 0, decrement local_addr_used */ + if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && + id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) + msk->pm.local_addr_used--; +} + static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, const struct mptcp_pm_addr_entry *entry) { @@ -1466,8 +1496,19 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); - if (remove_subflow) - mptcp_pm_remove_subflow(msk, &list); + + if (remove_subflow) { + spin_lock_bh(&msk->pm.lock); + mptcp_pm_nl_rm_subflow_received(msk, &list); + spin_unlock_bh(&msk->pm.lock); + } + + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + spin_lock_bh(&msk->pm.lock); + __mark_subflow_endp_available(msk, list.ids[0]); + spin_unlock_bh(&msk->pm.lock); + } + release_sock(sk); next: @@ -1502,6 +1543,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, spin_lock_bh(&msk->pm.lock); mptcp_pm_remove_addr(msk, &list); mptcp_pm_nl_rm_subflow_received(msk, &list); + __mark_subflow_endp_available(msk, 0); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1605,14 +1647,17 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, alist.ids[alist.nr++] = entry->addr.id; } + spin_lock_bh(&msk->pm.lock); if (alist.nr) { - spin_lock_bh(&msk->pm.lock); msk->pm.add_addr_signaled -= alist.nr; mptcp_pm_remove_addr(msk, &alist); - spin_unlock_bh(&msk->pm.lock); } if (slist.nr) - mptcp_pm_remove_subflow(msk, &slist); + mptcp_pm_nl_rm_subflow_received(msk, &slist); + /* Reset counters: maybe some subflows have been removed before */ + bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + msk->pm.local_addr_used = 0; + spin_unlock_bh(&msk->pm.lock); } static void mptcp_nl_remove_addrs_list(struct net *net, @@ -1900,6 +1945,7 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); + __mark_subflow_endp_available(msk, list.ids[0]); mptcp_pm_create_subflow_or_signal_addr(msk); spin_unlock_bh(&msk->pm.lock); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 08387140a646..71898519722f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1026,7 +1026,6 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); @@ -1133,8 +1132,6 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 88787b45e30d..8b541a080342 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -17,6 +17,9 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return NF_ACCEPT; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); proto = veth->h_vlan_encapsulated_proto; break; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index c2c005234dcd..98edcaa37b38 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -281,6 +281,9 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return false; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); if (veth->h_vlan_encapsulated_proto == proto) { *offset += VLAN_HLEN; diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 291ed2026367..eab0dc66bee6 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -107,11 +107,16 @@ static void nft_counter_reset(struct nft_counter_percpu_priv *priv, struct nft_counter *total) { struct nft_counter *this_cpu; + seqcount_t *myseq; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); + myseq = this_cpu_ptr(&nft_counter_seq); + + write_seqcount_begin(myseq); this_cpu->packets -= total->packets; this_cpu->bytes -= total->bytes; + write_seqcount_end(myseq); local_bh_enable(); } @@ -265,7 +270,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, struct nft_counter *this_cpu; seqcount_t *myseq; - preempt_disable(); + local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); myseq = this_cpu_ptr(&nft_counter_seq); @@ -273,7 +278,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, this_cpu->packets += stats->pkts; this_cpu->bytes += stats->bytes; write_seqcount_end(myseq); - preempt_enable(); + local_bh_enable(); } void nft_counter_init_seqcount(void) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 99d72543abd3..78d9961fcd44 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2706,7 +2706,7 @@ static struct pernet_operations ovs_net_ops = { }; static const char * const ovs_drop_reasons[] = { -#define S(x) (#x), +#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x), OVS_DROP_REASONS(S) #undef S }; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index edc72962ae63..0f8d581438c3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -446,12 +446,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct netem_sched_data *q = qdisc_priv(sch); /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; - struct sk_buff *skb2; + struct sk_buff *skb2 = NULL; struct sk_buff *segs = NULL; unsigned int prev_len = qdisc_pkt_len(skb); int count = 1; - int rc = NET_XMIT_SUCCESS; - int rc_drop = NET_XMIT_DROP; /* Do not fool qdisc_drop_all() */ skb->prev = NULL; @@ -480,19 +478,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb_orphan_partial(skb); /* - * If we need to duplicate packet, then re-insert at top of the - * qdisc tree, since parent queuer expects that only one - * skb will be queued. + * If we need to duplicate packet, then clone it before + * original is modified. */ - if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = qdisc_root_bh(sch); - u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - - q->duplicate = 0; - rootq->enqueue(skb2, rootq, to_free); - q->duplicate = dupsave; - rc_drop = NET_XMIT_SUCCESS; - } + if (count > 1) + skb2 = skb_clone(skb, GFP_ATOMIC); /* * Randomized packet corruption. @@ -504,7 +494,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb)) { skb = netem_segment(skb, sch, to_free); if (!skb) - return rc_drop; + goto finish_segs; + segs = skb->next; skb_mark_not_on_list(skb); qdisc_skb_cb(skb)->pkt_len = skb->len; @@ -530,7 +521,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); - return rc_drop; + if (skb2) + __qdisc_drop(skb2, to_free); + return NET_XMIT_DROP; + } + + /* + * If doing duplication then re-insert at top of the + * qdisc tree, since parent queuer expects that only one + * skb will be queued. + */ + if (skb2) { + struct Qdisc *rootq = qdisc_root_bh(sch); + u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ + + q->duplicate = 0; + rootq->enqueue(skb2, rootq, to_free); + q->duplicate = dupsave; + skb2 = NULL; } qdisc_qstats_backlog_inc(sch, skb); @@ -601,9 +609,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } finish_segs: + if (skb2) + __qdisc_drop(skb2, to_free); + if (segs) { unsigned int len, last_len; - int nb; + int rc, nb; len = skb ? skb->len : 0; nb = skb ? 1 : 0; diff --git a/rust/Makefile b/rust/Makefile index 1f10f92737f2..8de3ebba9551 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -227,7 +227,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ - -fstrict-flex-arrays=% \ + -fstrict-flex-arrays=% -fmin-function-alignment=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. @@ -350,12 +350,12 @@ rust-analyzer: $(Q)$(srctree)/scripts/generate_rust_analyzer.py \ --cfgs='core=$(core-cfgs)' --cfgs='alloc=$(alloc-cfgs)' \ $(realpath $(srctree)) $(realpath $(objtree)) \ - $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ + $(rustc_sysroot) $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ $(if $(KBUILD_EXTMOD),$(extmod_prefix),$(objtree))/rust-project.json redirect-intrinsics = \ - __addsf3 __eqsf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __unordsf2 \ - __adddf3 __ledf2 __ltdf2 __muldf3 __unorddf2 \ + __addsf3 __eqsf2 __extendsfdf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __truncdfsf2 __unordsf2 \ + __adddf3 __eqdf2 __ledf2 __ltdf2 __muldf3 __unorddf2 \ __muloti4 __multi3 \ __udivmodti4 __udivti3 __umodti3 diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs index bba2922c6ef7..f14b8d7caf89 100644 --- a/rust/compiler_builtins.rs +++ b/rust/compiler_builtins.rs @@ -40,16 +40,19 @@ macro_rules! define_panicking_intrinsics( define_panicking_intrinsics!("`f32` should not be used", { __addsf3, __eqsf2, + __extendsfdf2, __gesf2, __lesf2, __ltsf2, __mulsf3, __nesf2, + __truncdfsf2, __unordsf2, }); define_panicking_intrinsics!("`f64` should not be used", { __adddf3, + __eqdf2, __ledf2, __ltdf2, __muldf3, diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index 159e75292970..5be0cb9db3ee 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -94,7 +94,7 @@ use proc_macro::TokenStream; /// - `license`: ASCII string literal of the license of the kernel module (required). /// - `alias`: array of ASCII string literals of the alias names of the kernel module. /// - `firmware`: array of ASCII string literals of the firmware files of -/// the kernel module. +/// the kernel module. #[proc_macro] pub fn module(ts: TokenStream) -> TokenStream { module::module(ts) diff --git a/samples/trace_events/trace_custom_sched.c b/samples/trace_events/trace_custom_sched.c index b99d9ab7db85..dd409b704b35 100644 --- a/samples/trace_events/trace_custom_sched.c +++ b/samples/trace_events/trace_custom_sched.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) fmt #include <linux/trace_events.h> -#include <linux/version.h> #include <linux/module.h> #include <linux/sched.h> diff --git a/scripts/Makefile.build b/scripts/Makefile.build index efacca63c897..a5ac8ed1936f 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -487,7 +487,7 @@ $(subdir-ym): need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index fe3668dc4954..207325eaf1d1 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -400,26 +400,23 @@ $(obj)/%.dtb.S: $(obj)/%.dtb FORCE $(obj)/%.dtbo.S: $(obj)/%.dtbo FORCE $(call if_changed,wrap_S_dtb) -quiet_cmd_dtc = DTC $@ +quiet_dtb_check_tag = $(if $(dtb-check-enabled),[C], ) +cmd_dtb_check = $(if $(dtb-check-enabled),; $(DT_CHECKER) $(DT_CHECKER_FLAGS) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ || true) + +quiet_cmd_dtc = DTC $(quiet_dtb_check_tag) $@ cmd_dtc = $(HOSTCC) -E $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \ $(DTC) -o $@ -b 0 \ $(addprefix -i,$(dir $<) $(DTC_INCLUDE)) $(DTC_FLAGS) \ -d $(depfile).dtc.tmp $(dtc-tmp) ; \ - cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) - -DT_CHECK_CMD = $(DT_CHECKER) $(DT_CHECKER_FLAGS) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) + cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) \ + $(cmd_dtb_check) # NOTE: # Do not replace $(filter %.dtb %.dtbo, $^) with $(real-prereqs). When a single # DTB is turned into a multi-blob DTB, $^ will contain header file dependencies # recorded in the .*.cmd file. -ifneq ($(CHECK_DTBS),) -quiet_cmd_fdtoverlay = DTOVLCH $@ - cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) ; $(DT_CHECK_CMD) $@ || true -else -quiet_cmd_fdtoverlay = DTOVL $@ - cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) -endif +quiet_cmd_fdtoverlay = OVL $(quiet_dtb_check_tag) $@ + cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) $(cmd_dtb_check) $(multi-dtb-y): FORCE $(call if_changed,fdtoverlay) @@ -430,16 +427,11 @@ DT_CHECKER ?= dt-validate DT_CHECKER_FLAGS ?= $(if $(DT_SCHEMA_FILES),-l $(DT_SCHEMA_FILES),-m) DT_BINDING_DIR := Documentation/devicetree/bindings DT_TMP_SCHEMA := $(objtree)/$(DT_BINDING_DIR)/processed-schema.json - -quiet_cmd_dtb = DTC_CHK $@ - cmd_dtb = $(cmd_dtc) ; $(DT_CHECK_CMD) $@ || true -else -quiet_cmd_dtb = $(quiet_cmd_dtc) - cmd_dtb = $(cmd_dtc) +dtb-check-enabled = $(if $(filter %.dtb, $@),y) endif $(obj)/%.dtb: $(obj)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE - $(call if_changed_dep,dtb) + $(call if_changed_dep,dtc) $(obj)/%.dtbo: $(src)/%.dtso $(DTC) FORCE $(call if_changed_dep,dtc) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 1fa98b5e952b..306a6bb86e4d 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -62,7 +62,7 @@ endif targets += $(modules:%.o=%.ko) $(modules:%.o=%.mod.o) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 49946cb96844..5ceecbed31eb 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -33,7 +33,7 @@ targets += vmlinux vmlinux: scripts/link-vmlinux.sh vmlinux.o $(KBUILD_LDS) FORCE +$(call if_changed_dep,link_vmlinux) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 6de297916ce6..d64070b6b4bc 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -87,7 +87,7 @@ targets += modules.builtin modules.builtin: modules.builtin.modinfo FORCE $(call if_changed,modules_builtin) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 746ff2d272f2..5694df3da2e9 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -19,10 +19,6 @@ #include "gcc-common.h" #include "randomize_layout_seed.h" -#if BUILDING_GCC_MAJOR < 4 || (BUILDING_GCC_MAJOR == 4 && BUILDING_GCC_MINOR < 7) -#error "The RANDSTRUCT plugin requires GCC 4.7 or newer." -#endif - #define ORIG_TYPE_NAME(node) \ (TYPE_NAME(TYPE_MAIN_VARIANT(node)) != NULL_TREE ? ((const unsigned char *)IDENTIFIER_POINTER(TYPE_NAME(TYPE_MAIN_VARIANT(node)))) : (const unsigned char *)"anonymous") diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index f270c7b0cf34..d2bc63cde8c6 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -145,6 +145,7 @@ def main(): parser.add_argument('--cfgs', action='append', default=[]) parser.add_argument("srctree", type=pathlib.Path) parser.add_argument("objtree", type=pathlib.Path) + parser.add_argument("sysroot", type=pathlib.Path) parser.add_argument("sysroot_src", type=pathlib.Path) parser.add_argument("exttree", type=pathlib.Path, nargs="?") args = parser.parse_args() @@ -154,9 +155,12 @@ def main(): level=logging.INFO if args.verbose else logging.WARNING ) + # Making sure that the `sysroot` and `sysroot_src` belong to the same toolchain. + assert args.sysroot in args.sysroot_src.parents + rust_project = { "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs), - "sysroot_src": str(args.sysroot_src), + "sysroot": str(args.sysroot), } json.dump(rust_project, sys.stdout, sort_keys=True, indent=4) diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 87f34925eb7b..404edf7587e0 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -162,7 +162,7 @@ fn main() { "data-layout", "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } @@ -179,7 +179,7 @@ fn main() { "data-layout", "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 0ed873491bf5..123dab0572f8 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -5,8 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: kallsyms [--all-symbols] [--absolute-percpu] - * [--lto-clang] in.map > out.S + * Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S * * Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might @@ -62,7 +61,6 @@ static struct sym_entry **table; static unsigned int table_size, table_cnt; static int all_symbols; static int absolute_percpu; -static int lto_clang; static int token_profit[0x10000]; @@ -73,8 +71,7 @@ static unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] " - "[--lto-clang] in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n"); exit(1); } @@ -344,25 +341,6 @@ static bool symbol_absolute(const struct sym_entry *s) return s->percpu_absolute; } -static void cleanup_symbol_name(char *s) -{ - char *p; - - /* - * ASCII[.] = 2e - * ASCII[0-9] = 30,39 - * ASCII[A-Z] = 41,5a - * ASCII[_] = 5f - * ASCII[a-z] = 61,7a - * - * As above, replacing the first '.' in ".llvm." with '\0' does not - * affect the main sorting, but it helps us with subsorting. - */ - p = strstr(s, ".llvm."); - if (p) - *p = '\0'; -} - static int compare_names(const void *a, const void *b) { int ret; @@ -526,10 +504,6 @@ static void write_src(void) output_address(relative_base); printf("\n"); - if (lto_clang) - for (i = 0; i < table_cnt; i++) - cleanup_symbol_name((char *)table[i]->sym); - sort_symbols_by_name(); output_label("kallsyms_seqs_of_names"); for (i = 0; i < table_cnt; i++) @@ -807,7 +781,6 @@ int main(int argc, char **argv) static const struct option long_options[] = { {"all-symbols", no_argument, &all_symbols, 1}, {"absolute-percpu", no_argument, &absolute_percpu, 1}, - {"lto-clang", no_argument, <o_clang, 1}, {}, }; diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 902eb429b9db..0b7952471c18 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -167,6 +167,8 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do sed -i "/$CFG[ =]/d" $MERGE_FILE fi done + # In case the previous file lacks a new line at the end + echo >> $TMP_FILE cat $MERGE_FILE >> $TMP_FILE done diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index f7b2503cdba9..070a319140e8 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -156,10 +156,6 @@ kallsyms() kallsymopt="${kallsymopt} --absolute-percpu" fi - if is_enabled CONFIG_LTO_CLANG; then - kallsymopt="${kallsymopt} --lto-clang" - fi - info KSYMS "${2}.S" scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S" @@ -219,7 +215,8 @@ kallsymso= strip_debug= if is_enabled CONFIG_KALLSYMS; then - kallsyms /dev/null .tmp_vmlinux0.kallsyms + truncate -s0 .tmp_vmlinux.kallsyms0.syms + kallsyms .tmp_vmlinux.kallsyms0.syms .tmp_vmlinux0.kallsyms fi if is_enabled CONFIG_KALLSYMS || is_enabled CONFIG_DEBUG_INFO_BTF; then diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c index b5f81a05be36..4edc5bbbcda3 100644 --- a/security/keys/trusted-keys/trusted_dcp.c +++ b/security/keys/trusted-keys/trusted_dcp.c @@ -186,20 +186,21 @@ out: return ret; } -static int decrypt_blob_key(u8 *key) +static int decrypt_blob_key(u8 *encrypted_key, u8 *plain_key) { - return do_dcp_crypto(key, key, false); + return do_dcp_crypto(encrypted_key, plain_key, false); } -static int encrypt_blob_key(u8 *key) +static int encrypt_blob_key(u8 *plain_key, u8 *encrypted_key) { - return do_dcp_crypto(key, key, true); + return do_dcp_crypto(plain_key, encrypted_key, true); } static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; blen = calc_blob_len(p->key_len); if (blen > MAX_BLOB_SIZE) @@ -207,30 +208,36 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) b->fmt_version = DCP_BLOB_VERSION; get_random_bytes(b->nonce, AES_KEYSIZE_128); - get_random_bytes(b->blob_key, AES_KEYSIZE_128); + get_random_bytes(plain_blob_key, AES_KEYSIZE_128); - ret = do_aead_crypto(p->key, b->payload, p->key_len, b->blob_key, + ret = do_aead_crypto(p->key, b->payload, p->key_len, plain_blob_key, b->nonce, true); if (ret) { pr_err("Unable to encrypt blob payload: %i\n", ret); - return ret; + goto out; } - ret = encrypt_blob_key(b->blob_key); + ret = encrypt_blob_key(plain_blob_key, b->blob_key); if (ret) { pr_err("Unable to encrypt blob key: %i\n", ret); - return ret; + goto out; } - b->payload_len = get_unaligned_le32(&p->key_len); + put_unaligned_le32(p->key_len, &b->payload_len); p->blob_len = blen; - return 0; + ret = 0; + +out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + + return ret; } static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; if (b->fmt_version != DCP_BLOB_VERSION) { pr_err("DCP blob has bad version: %i, expected %i\n", @@ -248,14 +255,14 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) goto out; } - ret = decrypt_blob_key(b->blob_key); + ret = decrypt_blob_key(b->blob_key, plain_blob_key); if (ret) { pr_err("Unable to decrypt blob key: %i\n", ret); goto out; } ret = do_aead_crypto(b->payload, p->key, p->key_len + DCP_BLOB_AUTHLEN, - b->blob_key, b->nonce, false); + plain_blob_key, b->nonce, false); if (ret) { pr_err("Unwrap of DCP payload failed: %i\n", ret); goto out; @@ -263,6 +270,8 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) ret = 0; out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + return ret; } diff --git a/sound/core/timer.c b/sound/core/timer.c index d104adc75a8b..71a07c1662f5 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -547,7 +547,7 @@ static int snd_timer_start1(struct snd_timer_instance *timeri, /* check the actual time for the start tick; * bail out as error if it's way too low (< 100us) */ - if (start) { + if (start && !(timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) { if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000) return -EINVAL; } diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 3a92e98da72d..d68bf7591d90 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -134,7 +134,7 @@ static const struct reg_sequence cs35l41_hda_mute[] = { }; static const struct cs_dsp_client_ops client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l41_request_tuning_param_file(struct cs35l41_hda *cs35l41, char *tuning_filename, diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 31cc92bac89a..a9dfd62637cf 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -413,7 +413,7 @@ static void cs35l56_hda_remove_controls(struct cs35l56_hda *cs35l56) } static const struct cs_dsp_client_ops cs35l56_hda_client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 480e82df7a4c..d022a25635f9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11,15 +11,18 @@ */ #include <linux/acpi.h> +#include <linux/cleanup.h> #include <linux/init.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/pci.h> #include <linux/dmi.h> #include <linux/module.h> +#include <linux/i2c.h> #include <linux/input.h> #include <linux/leds.h> #include <linux/ctype.h> +#include <linux/spi/spi.h> #include <sound/core.h> #include <sound/jack.h> #include <sound/hda_codec.h> @@ -583,7 +586,6 @@ static void alc_shutup_pins(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0236: case 0x10ec0256: - case 0x10ec0257: case 0x19e58326: case 0x10ec0283: case 0x10ec0285: @@ -6856,6 +6858,86 @@ static void comp_generic_fixup(struct hda_codec *cdc, int action, const char *bu } } +static void cs35lxx_autodet_fixup(struct hda_codec *cdc, + const struct hda_fixup *fix, + int action) +{ + struct device *dev = hda_codec_dev(cdc); + struct acpi_device *adev; + struct fwnode_handle *fwnode __free(fwnode_handle) = NULL; + const char *bus = NULL; + static const struct { + const char *hid; + const char *name; + } acpi_ids[] = {{ "CSC3554", "cs35l54-hda" }, + { "CSC3556", "cs35l56-hda" }, + { "CSC3557", "cs35l57-hda" }}; + char *match; + int i, count = 0, count_devindex = 0; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + for (i = 0; i < ARRAY_SIZE(acpi_ids); ++i) { + adev = acpi_dev_get_first_match_dev(acpi_ids[i].hid, NULL, -1); + if (adev) + break; + } + if (!adev) { + dev_err(dev, "Failed to find ACPI entry for a Cirrus Amp\n"); + return; + } + + count = i2c_acpi_client_count(adev); + if (count > 0) { + bus = "i2c"; + } else { + count = acpi_spi_count_resources(adev); + if (count > 0) + bus = "spi"; + } + + fwnode = fwnode_handle_get(acpi_fwnode_handle(adev)); + acpi_dev_put(adev); + + if (!bus) { + dev_err(dev, "Did not find any buses for %s\n", acpi_ids[i].hid); + return; + } + + if (!fwnode) { + dev_err(dev, "Could not get fwnode for %s\n", acpi_ids[i].hid); + return; + } + + /* + * When available the cirrus,dev-index property is an accurate + * count of the amps in a system and is used in preference to + * the count of bus devices that can contain additional address + * alias entries. + */ + count_devindex = fwnode_property_count_u32(fwnode, "cirrus,dev-index"); + if (count_devindex > 0) + count = count_devindex; + + match = devm_kasprintf(dev, GFP_KERNEL, "-%%s:00-%s.%%d", acpi_ids[i].name); + if (!match) + return; + dev_info(dev, "Found %d %s on %s (%s)\n", count, acpi_ids[i].hid, bus, match); + comp_generic_fixup(cdc, action, bus, acpi_ids[i].hid, match, count); + + break; + case HDA_FIXUP_ACT_FREE: + /* + * Pass the action on to comp_generic_fixup() so that + * hda_component_manager functions can be called in just once + * place. In this context the bus, hid, match_str or count + * values do not need to be calculated. + */ + comp_generic_fixup(cdc, action, NULL, NULL, NULL, 0); + break; + } +} + static void cs35l41_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action) { comp_generic_fixup(cdc, action, "i2c", "CSC3551", "-%s:00-cs35l41-hda.%d", 2); @@ -7528,6 +7610,7 @@ enum { ALC256_FIXUP_CHROME_BOOK, ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7, ALC287_FIXUP_LENOVO_SSID_17AA3820, + ALCXXX_FIXUP_CS35LXX, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9857,6 +9940,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_lenovo_ssid_17aa3820, }, + [ALCXXX_FIXUP_CS35LXX] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35lxx_autodet_fixup, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -10271,6 +10358,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d08, "HP EliteBook 1045 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d86, "HP Elite x360 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8c, "HP EliteBook 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8d, "HP Elite x360 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8e, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8f, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d90, "HP EliteBook 860 16 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d91, "HP ZBook Firefly 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d92, "HP ZBook Firefly 16 G12", ALCXXX_FIXUP_CS35LXX), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 49bd7097d892..89d8235537cd 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -2,10 +2,12 @@ // // TAS2781 HDA I2C driver // -// Copyright 2023 Texas Instruments, Inc. +// Copyright 2023 - 2024 Texas Instruments, Inc. // // Author: Shenghao Ding <shenghao-ding@ti.com> +// Current maintainer: Baojun Xu <baojun.xu@ti.com> +#include <asm/unaligned.h> #include <linux/acpi.h> #include <linux/crc8.h> #include <linux/crc32.h> @@ -519,20 +521,22 @@ static void tas2781_apply_calib(struct tasdevice_priv *tas_priv) static const unsigned char rgno_array[CALIB_MAX] = { 0x74, 0x0c, 0x14, 0x70, 0x7c, }; - unsigned char *data; + int offset = 0; int i, j, rc; + __be32 data; for (i = 0; i < tas_priv->ndev; i++) { - data = tas_priv->cali_data.data + - i * TASDEVICE_SPEAKER_CALIBRATION_SIZE; for (j = 0; j < CALIB_MAX; j++) { + data = cpu_to_be32( + *(uint32_t *)&tas_priv->cali_data.data[offset]); rc = tasdevice_dev_bulk_write(tas_priv, i, TASDEVICE_REG(0, page_array[j], rgno_array[j]), - &(data[4 * j]), 4); + (unsigned char *)&data, 4); if (rc < 0) dev_err(tas_priv->dev, "chn %d calib %d bulk_wr err = %d\n", i, j, rc); + offset += 4; } } } diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 60d366e53526..6400ac875e6f 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -11,7 +11,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> -#include <linux/version.h> #include <linux/types.h> #include <linux/init.h> #include <linux/delay.h> diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index f13a8d63a019..aaa6a515d0f8 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -273,6 +273,7 @@ YAMAHA_DEVICE(0x105a, NULL), YAMAHA_DEVICE(0x105b, NULL), YAMAHA_DEVICE(0x105c, NULL), YAMAHA_DEVICE(0x105d, NULL), +YAMAHA_DEVICE(0x1718, "P-125"), { USB_DEVICE(0x0499, 0x1503), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ea063a14cdd8..e7b68c67852e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2221,6 +2221,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 7b32b99023a2..5fd7caea4419 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -86,9 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C +#define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -162,9 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 1691297a766a..eaeda001784e 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -645,6 +645,9 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) #define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) +#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6) +#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7) +#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 3c7434329661..dd4682857c12 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -18,170 +18,170 @@ /* * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature + * bit is not displayed in /proc/cpuinfo at all. * * When adding new features here that depend on other features, * please update the table in kernel/cpu/cpuid-deps.c as well. */ /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */ #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */ #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */ #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ -#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */ #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -189,93 +189,93 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ -#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ -#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ -#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ -#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ -#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ -#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ -#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ -#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ -#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */ +#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */ +#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ -#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ -#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ -#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ -#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ +#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ -#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */ +#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -283,181 +283,183 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ -#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ -#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ -#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ -#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ -#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ -#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ -#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ -#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ -#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ -#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ -#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ -#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ -#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ -#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ -#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ -#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ -#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ -#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ -#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ -#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ -#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ -#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ -#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ -#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ -#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ -#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */ +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */ +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */ +#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ -#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ -#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ -#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ -#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ -#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ -#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ -#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */ -#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ -#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */ -#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ -#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ -#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ +#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ +#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */ +#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ -#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ -#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ -#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ -#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ -#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ -#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ -#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */ -#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ -#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ -#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */ +#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ -#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ -#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ -#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */ -#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ -#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ -#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ -#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ -#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ -#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ -#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ -#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ -#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */ +#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ -#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ -#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ -#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ -#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ -#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ -#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ -#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ -#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ -#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ -#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ -#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */ +#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ -#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */ -#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ -#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ -#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ -#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ -#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ -#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ -#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ -#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ -#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ -#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -465,59 +467,60 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ -#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ -#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ -#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */ #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ -#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ -#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ -#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ -#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ -#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ -#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ -#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ -#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ -#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ -#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ -#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ -#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ -#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ -#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ -#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e022e6eb766c..82c6a4d350e0 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -566,6 +566,12 @@ #define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* V6 PMON MSR range */ +#define MSR_IA32_PMC_V6_GP0_CTR 0x1900 +#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_STEP 4 + /* KeyID partitioning between MKTME and TDX */ #define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 @@ -660,6 +666,8 @@ #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_SVSM_CAA 0xc001f000 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 @@ -781,6 +789,8 @@ #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 @@ -1164,6 +1174,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 9fae1b73b529..bf57a824f722 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -106,6 +106,7 @@ struct kvm_ioapic_state { #define KVM_RUN_X86_SMM (1 << 0) #define KVM_RUN_X86_BUS_LOCK (1 << 1) +#define KVM_RUN_X86_GUEST_MODE (1 << 2) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -697,6 +698,11 @@ enum sev_cmd_id { /* Second time is the charm; improved versions of the above ioctls. */ KVM_SEV_INIT2, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_NR_MAX, }; @@ -824,6 +830,48 @@ struct kvm_sev_receive_update_data { __u32 pad2; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) @@ -874,5 +922,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SW_PROTECTED_VM 1 #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 80e1df482337..1814b413fd57 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/tools/include/uapi/README b/tools/include/uapi/README new file mode 100644 index 000000000000..7147b1b2cb28 --- /dev/null +++ b/tools/include/uapi/README @@ -0,0 +1,73 @@ +Why we want a copy of kernel headers in tools? +============================================== + +There used to be no copies, with tools/ code using kernel headers +directly. From time to time tools/perf/ broke due to legitimate kernel +hacking. At some point Linus complained about such direct usage. Then we +adopted the current model. + +The way these headers are used in perf are not restricted to just +including them to compile something. + +There are sometimes used in scripts that convert defines into string +tables, etc, so some change may break one of these scripts, or new MSRs +may use some different #define pattern, etc. + +E.g.: + + $ ls -1 tools/perf/trace/beauty/*.sh | head -5 + tools/perf/trace/beauty/arch_errno_names.sh + tools/perf/trace/beauty/drm_ioctl.sh + tools/perf/trace/beauty/fadvise.sh + tools/perf/trace/beauty/fsconfig.sh + tools/perf/trace/beauty/fsmount.sh + $ + $ tools/perf/trace/beauty/fadvise.sh + static const char *fadvise_advices[] = { + [0] = "NORMAL", + [1] = "RANDOM", + [2] = "SEQUENTIAL", + [3] = "WILLNEED", + [4] = "DONTNEED", + [5] = "NOREUSE", + }; + $ + +The tools/perf/check-headers.sh script, part of the tools/ build +process, points out changes in the original files. + +So its important not to touch the copies in tools/ when doing changes in +the original kernel headers, that will be done later, when +check-headers.sh inform about the change to the perf tools hackers. + +Another explanation from Ingo Molnar: +It's better than all the alternatives we tried so far: + + - Symbolic links and direct #includes: this was the original approach but + was pushed back on from the kernel side, when tooling modified the + headers and broke them accidentally for kernel builds. + + - Duplicate self-defined ABI headers like glibc: double the maintenance + burden, double the chance for mistakes, plus there's no tech-driven + notification mechanism to look at new kernel side changes. + +What we are doing now is a third option: + + - A software-enforced copy-on-write mechanism of kernel headers to + tooling, driven by non-fatal warnings on the tooling side build when + kernel headers get modified: + + Warning: Kernel ABI header differences: + diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h + diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h + diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h + ... + + The tooling policy is to always pick up the kernel side headers as-is, + and integate them into the tooling build. The warnings above serve as a + notification to tooling maintainers that there's changes on the kernel + side. + +We've been using this for many years now, and it might seem hacky, but +works surprisingly well. + diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a00d53d02723..5bf6148cac2b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index d4d86e566e07..535cb68fdb5c 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index e5af8c692dc0..637efc055145 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ @@ -918,6 +931,8 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_PRE_FAULT_MEMORY 236 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 +#define KVM_CAP_X86_GUEST_MODE 238 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3a64499b0f5d..4842c36fdf80 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1349,12 +1349,14 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3656f1ca7a21..ebae8415dfbb 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index bd0fee24ad10..01071182763e 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index a396f6e6ab5b..7093ee21c0d1 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -1,8 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # 64-bit system call numbers and entry vectors # # The format is: -# <number> <abi> <name> <entry point> +# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]] # # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls # @@ -68,7 +69,7 @@ 57 common fork sys_fork 58 common vfork sys_vfork 59 64 execve sys_execve -60 common exit sys_exit +60 common exit sys_exit - noreturn 61 common wait4 sys_wait4 62 common kill sys_kill 63 common uname sys_newuname @@ -239,7 +240,7 @@ 228 common clock_gettime sys_clock_gettime 229 common clock_getres sys_clock_getres 230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group +231 common exit_group sys_exit_group - noreturn 232 common epoll_wait sys_epoll_wait 233 common epoll_ctl sys_epoll_ctl 234 common tgkill sys_tgkill @@ -343,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index de76bbc50bfb..5c9335fff2d3 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <internal/lib.h> +#include <inttypes.h> #include <subcmd/parse-options.h> #include <api/fd/array.h> #include <api/fs/fs.h> @@ -688,7 +689,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* lock */ csv_sep, daemon->base, "lock"); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - daemon->start) / 60); @@ -700,7 +701,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) daemon->base, SESSION_OUTPUT); fprintf(out, " lock: %s/lock\n", daemon->base); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - daemon->start) / 60); } } @@ -727,7 +728,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* session ack */ csv_sep, session->base, SESSION_ACK); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - session->start) / 60); @@ -745,7 +746,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) session->base, SESSION_CONTROL); fprintf(out, " ack: %s/%s\n", session->base, SESSION_ACK); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - session->start) / 60); } } diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 89d16b90370b..df9cdb8bbfb8 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; @@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_socket(int family, int type, int protocol); extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, + int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_listen_socket(struct socket *sock, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 45e4e64fd664..753971770733 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -329,12 +329,17 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + +#define PROCFS_IOCTL_MAGIC 'f' /* Pagemap ioctl */ -#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ #define PAGE_IS_WPALLOWED (1 << 0) @@ -393,4 +398,158 @@ struct pm_scan_arg { __u64 return_mask; }; +/* /proc/<pid>/maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc/<pid>/maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ +}; + #endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index ad5478dbad00..225bc366ffcb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -188,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) @@ -202,10 +205,13 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 628d46a0da92..8bf7e8a0eb6f 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -334,7 +334,7 @@ union snd_pcm_sync_id { unsigned char id[16]; unsigned short id16[8]; unsigned int id32[4]; -}; +} __attribute__((deprecated)); struct snd_pcm_info { unsigned int device; /* RO/WR (control): device number */ @@ -348,7 +348,7 @@ struct snd_pcm_info { int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */ unsigned int subdevices_count; unsigned int subdevices_avail; - union snd_pcm_sync_id sync; /* hardware synchronization ID */ + unsigned char pad1[16]; /* was: hardware synchronization ID */ unsigned char reserved[64]; /* reserved for future... */ }; @@ -420,7 +420,8 @@ struct snd_pcm_hw_params { unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ - unsigned char reserved[64]; /* reserved for future */ + unsigned char sync[16]; /* R: synchronization ID (perfect sync - one clock source) */ + unsigned char reserved[48]; /* reserved for future */ }; enum { diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 030b388800f0..3d1ca9e38b1f 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -14,6 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat +ldflags-y += --wrap=cxl_setup_parent_dport DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6f737941dc0e..d619672faa49 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -299,6 +299,18 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); +void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (!ops || !ops->is_mock_port(dport->dport_dev)) + cxl_setup_parent_dport(host, dport); + + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh index 877cd6df94a1..fe905a7f34b3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 lib_dir=$(dirname $0)/../../../net/forwarding +ethtool_lib_dir=$(dirname $0)/../hw ALL_TESTS=" autoneg @@ -11,7 +12,7 @@ ALL_TESTS=" NUM_NETIFS=2 : ${TIMEOUT:=30000} # ms source $lib_dir/lib.sh -source $lib_dir/ethtool_lib.sh +source $ethtool_lib_dir/ethtool_lib.sh setup_prepare() { diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 7b8a5def54a1..cfad627e8d94 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret +endif TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index e140558e6f53..2c3a0eb6b22d 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int fd, ret = -1; int compaction_index = 0; char nr_hugepages[20] = {0}; - char init_nr_hugepages[20] = {0}; + char init_nr_hugepages[24] = {0}; - sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages); + snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), + "%lu", initial_nr_hugepages); /* We want to test with 80% of available memory. Else, OOM killer comes in to play */ diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 03ac4f2e1cce..36045edb10de 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +if [ -x ./memfd_secret ] +then (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret +fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -H -s 100 diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 64bd00fe9a4f..90f8a244ea90 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -142,6 +142,58 @@ extern_learn() bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null } +other_tpid() +{ + local mac=de:ad:be:ef:13:37 + + # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are + # classified as untagged by a bridge with vlan_protocol 802.1Q, and + # are processed in the PVID of the ingress port (here 1). Not VID 3, + # and not VID 5. + RET=0 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + ip link set $h2 promisc on + ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + # Match on 'self' addresses as well, for those drivers which + # do not push their learned addresses to the bridge software + # database + bridge -j fdb show $swp1 | \ + jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null + check_err $? "FDB entry was not learned when it should" + + log_test "FDB entry in PVID for VLAN-tagged with other TPID" + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was not forwarded when it should" + log_test "Reception of VLAN with other TPID as untagged" + + bridge vlan del dev $swp1 vid 1 + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was forwarded when should not" + log_test "Reception of VLAN with other TPID as untagged (no PVID)" + + bridge vlan add dev $swp1 vid 1 pvid untagged + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index cb0fcd6f0293..c992e385159c 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -500,6 +500,11 @@ check_err_fail() fi } +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + xfail_on_slow() { if [[ $KSFT_MACHINE_SLOW = yes ]]; then @@ -1120,6 +1125,39 @@ mac_get() ip -j link show dev $if_name | jq -r '.[]["address"]' } +ether_addr_to_u64() +{ + local addr="$1" + local order="$((1 << 40))" + local val=0 + local byte + + addr="${addr//:/ }" + + for byte in $addr; do + byte="0x$byte" + val=$((val + order * byte)) + order=$((order >> 8)) + done + + printf "0x%x" $val +} + +u64_to_ether_addr() +{ + local val=$1 + local byte + local i + + for ((i = 40; i >= 0; i -= 8)); do + byte=$(((val & (0xff << i)) >> i)) + printf "%02x" $byte + if [ $i -ne 0 ]; then + printf ":" + fi + done +} + ipv6_lladdr_get() { local if_name=$1 @@ -2236,3 +2274,22 @@ absval() echo $((v > 0 ? v : -v)) } + +has_unicast_flt() +{ + local dev=$1; shift + local mac_addr=$(mac_get $dev) + local tmp=$(ether_addr_to_u64 $mac_addr) + local promisc + + ip link set $dev up + ip link add link $dev name macvlan-tmp type macvlan mode private + ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1))) + ip link set macvlan-tmp up + + promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity') + + ip link del macvlan-tmp + + [[ $promisc == 1 ]] && echo "no" || echo "yes" +} diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 4b364cdf3ef0..648868f74604 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,7 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ + vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \ + vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes @@ -37,9 +39,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" -NON_IP_MC="01:02:03:04:05:06" -NON_IP_PKT="00:04 48:45:4c:4f" -BC="ff:ff:ff:ff:ff:ff" +PTP_1588_L2_SYNC=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00" +PTP_1588_L2_FOLLOW_UP=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c5 f1 17 97 ed f0" +PTP_1588_L2_PDELAY_REQ=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00" +PTP_1588_IPV4_SYNC=" \ +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ +01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" +PTP_1588_IPV4_FOLLOW_UP=" +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ +01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ +c6 0f 1d 9a 61 87" +PTP_1588_IPV4_PDELAY_REQ=" \ +01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ +00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ +00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_SYNC=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ +7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_FOLLOW_UP=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ +00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c6 2a 32 09 bd 74 00 00" +PTP_1588_IPV6_PDELAY_REQ=" \ +33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ +5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ +63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -47,13 +108,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl" h1=${NETIFS[p1]} h2=${NETIFS[p2]} -send_non_ip() +send_raw() { - local if_name=$1 - local smac=$2 - local dmac=$3 + local if_name=$1; shift + local pkt="$1"; shift + local smac=$(mac_get $if_name) + + pkt="${pkt/00:00:de:ad:be:ef/$smac}" - $MZ -q $if_name "$dmac $smac $NON_IP_PKT" + $MZ -q $if_name "$pkt" } send_uc_ipv4() @@ -68,10 +131,11 @@ send_uc_ipv4() check_rcv() { - local if_name=$1 - local type=$2 - local pattern=$3 - local should_receive=$4 + local if_name=$1; shift + local type=$1; shift + local pattern=$1; shift + local should_receive=$1; shift + local test_name="$1"; shift local should_fail= [ $should_receive = true ] && should_fail=0 || should_fail=1 @@ -81,7 +145,7 @@ check_rcv() check_err_fail "$should_fail" "$?" "reception" - log_test "$if_name: $type" + log_test "$test_name: $type" } mc_route_prepare() @@ -104,44 +168,78 @@ mc_route_destroy() run_test() { - local rcv_if_name=$1 - local smac=$(mac_get $h1) + local send_if_name=$1; shift + local rcv_if_name=$1; shift + local skip_ptp=$1; shift + local no_unicast_flt=$1; shift + local test_name="$1"; shift + local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) + local should_receive tcpdump_start $rcv_if_name - mc_route_prepare $h1 + mc_route_prepare $send_if_name mc_route_prepare $rcv_if_name - send_uc_ipv4 $h1 $rcv_dmac - send_uc_ipv4 $h1 $MACVLAN_ADDR - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + send_uc_ipv4 $send_if_name $rcv_dmac + send_uc_ipv4 $send_if_name $MACVLAN_ADDR + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1 ip link set dev $rcv_if_name promisc on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2 ip link set dev $rcv_if_name promisc off mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR - mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_send $send_if_name $JOINED_IPV4_MC_ADDR mc_leave mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR - mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_send $send_if_name $JOINED_IPV6_MC_ADDR mc_leave - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1 ip link set dev $rcv_if_name allmulticast on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3 ip link set dev $rcv_if_name allmulticast off mc_route_destroy $rcv_if_name - mc_route_destroy $h1 + mc_route_destroy $send_if_name + + if [ $skip_ptp = false ]; then + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_SYNC" + send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" + ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name + + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + + mc_join $rcv_if_name 224.0.1.129 + send_raw $send_if_name "$PTP_1588_IPV4_SYNC" + send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name 224.0.0.107 + send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ" + mc_leave + + mc_join $rcv_if_name ff0e::181 + send_raw $send_if_name "$PTP_1588_IPV6_SYNC" + send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name ff02::6b + send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ" + mc_leave + fi sleep 1 @@ -149,61 +247,99 @@ run_test() check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ - "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name \ - "Unicast IPv4 to unknown MAC address, allmulti" \ - "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name \ + "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name \ "Multicast IPv4 to unknown group" \ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" + + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + fi tcpdump_cleanup $rcv_if_name } @@ -228,57 +364,208 @@ h2_destroy() simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 } +h1_vlan_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_vlan_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_vlan_create() +{ + simple_if_init $h2 + vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_vlan_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 +} + bridge_create() { - ip link add br0 type bridge + local vlan_filtering=$1 + + ip link add br0 type bridge vlan_filtering $vlan_filtering ip link set br0 address $BRIDGE_ADDR ip link set br0 up ip link set $h2 master br0 ip link set $h2 up - - simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 } bridge_destroy() { - simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 - ip link del br0 } -standalone() +macvlan_create() { - h1_create - h2_create + local lower=$1 - ip link add link $h2 name macvlan0 type macvlan mode private + ip link add link $lower name macvlan0 type macvlan mode private ip link set macvlan0 address $MACVLAN_ADDR ip link set macvlan0 up +} - run_test $h2 - +macvlan_destroy() +{ ip link del macvlan0 +} + +standalone() +{ + local no_unicast_flt=true + local skip_ptp=false + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + + h1_create + h2_create + macvlan_create $h2 + + run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2" + macvlan_destroy h2_destroy h1_destroy } -bridge() +test_bridge() { + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + h1_create - bridge_create + bridge_create $vlan_filtering + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0 - ip link add link br0 name macvlan0 type macvlan mode private - ip link set macvlan0 address $MACVLAN_ADDR - ip link set macvlan0 up + run_test $h1 br0 $skip_ptp $no_unicast_flt \ + "vlan_filtering=$vlan_filtering bridge" - run_test br0 + macvlan_destroy + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 + bridge_destroy + h1_destroy +} - ip link del macvlan0 +vlan_unaware_bridge() +{ + test_bridge 0 +} + +vlan_aware_bridge() +{ + test_bridge 1 +} + +test_vlan() +{ + local no_unicast_flt=true + local skip_ptp=false + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + h1_vlan_create + h2_vlan_create + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper" + + macvlan_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_bridged_port() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=false + + # br_manage_promisc() will not force a single vlan_filtering port to + # promiscuous mode, so we should still expect unicast filtering to take + # place if the device can do it. + if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + bridge_create $vlan_filtering + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridged port" + + macvlan_destroy bridge_destroy - h1_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridged_port() +{ + vlan_over_bridged_port 0 +} + +vlan_over_vlan_aware_bridged_port() +{ + vlan_over_bridged_port 1 +} + +vlan_over_bridge() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + + h1_vlan_create + bridge_create $vlan_filtering + simple_if_init br0 + vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0.100 + + if [ $vlan_filtering = 1 ]; then + bridge vlan add dev $h2 vid 100 master + bridge vlan add dev br0 vid 100 self + fi + + run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridge" + + if [ $vlan_filtering = 1 ]; then + bridge vlan del dev br0 vid 100 self + bridge vlan del dev $h2 vid 100 master + fi + + macvlan_destroy + vlan_destroy br0 100 + simple_if_fini br0 + bridge_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridge() +{ + vlan_over_bridge 0 +} + +vlan_over_vlan_aware_bridge() +{ + vlan_over_bridge 1 } cleanup() diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9ea6d698e9d3..89e553e0e0c2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -436,9 +436,10 @@ reset_with_tcp_filter() local ns="${!1}" local src="${2}" local target="${3}" + local chain="${4:-INPUT}" if ! ip netns exec "${ns}" ${iptables} \ - -A INPUT \ + -A "${chain}" \ -s "${src}" \ -p tcp \ -j "${target}"; then @@ -3058,6 +3059,7 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh fullmesh=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -3571,10 +3573,10 @@ endpoint_tests() mptcp_lib_kill_wait $tests_pid fi - if reset "delete and re-add" && + if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & @@ -3591,19 +3593,37 @@ endpoint_tests() chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 + + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_attempt_fail $ns2 + chk_subflow_nr "after new reject" 2 + chk_mptcp_info subflows 1 subflows 1 + + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_del_endpoint $ns2 3 10.0.3.2 + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after no reject" 3 + chk_mptcp_info subflows 2 subflows 2 + mptcp_lib_kill_wait $tests_pid + + chk_join_nr 3 3 3 + chk_rm_nr 1 1 fi # remove and re-add if reset "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal test_linkfail=4 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! @@ -3615,17 +3635,53 @@ endpoint_tests() chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns1 1 10.0.2.1 + pm_nl_del_endpoint $ns1 2 224.0.0.1 sleep 0.5 chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal wait_mpj $ns2 - chk_subflow_nr "after re-add" 2 - chk_mptcp_info subflows 1 subflows 1 + chk_subflow_nr "after re-add" 3 + chk_mptcp_info subflows 2 subflows 2 mptcp_lib_kill_wait $tests_pid + + chk_join_nr 3 3 3 + chk_add_nr 4 4 + chk_rm_nr 2 1 invert fi + # flush and re-add + if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_attempt_fail $ns2 + chk_subflow_nr "before flush" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_flush_endpoint $ns2 + pm_nl_flush_endpoint $ns1 + wait_rm_addr $ns2 0 + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + mptcp_lib_kill_wait $tests_pid + + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_rm_nr 1 0 invert + fi } # [$1: error message] diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 11a1ebda564f..d5ffd8c9172e 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,8 +7,6 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" - # set global exit status, but never reset nonzero one. check_err() { @@ -38,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + ip netns exec "${PEER_NS}" ethtool -K veth1 gro on } run_one() { @@ -46,17 +44,19 @@ run_one() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -73,6 +73,7 @@ run_one_nat() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 if [[ ${tx_args} = *-4* ]]; then ipt_cmd=iptables @@ -93,16 +94,17 @@ run_one_nat() { # ... so that GRO will match the UDP_GRO enabled socket, but packets # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & - pid=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ - echo "ok" || \ - echo "failed"& + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - kill -INT $pid - wait $(jobs -p) + check_err $? + kill -INT ${PID1} + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -111,20 +113,26 @@ run_one_2sock() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 + check_err $? wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -196,11 +204,6 @@ run_all() { return $ret } -if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Run 'make' first" - exit -1 -fi - if [[ $# -eq 0 ]]; then run_all elif [[ $1 == "__subprocess" ]]; then diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index ee349187636f..4f255cec0c22 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -143,7 +143,6 @@ class PluginMgr: except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) - print('test_ordinal is {}'.format(test_ordinal)) print('testid is {}'.format(caseinfo['id'])) raise diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index f594a44df840..2f756628613d 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -651,8 +651,10 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) return NULL; tool->data = osnoise_alloc_top(nr_cpus); - if (!tool->data) - goto out_err; + if (!tool->data) { + osnoise_destroy_tool(tool); + return NULL; + } tool->params = params; @@ -660,11 +662,6 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) osnoise_top_handler, NULL); return tool; - -out_err: - osnoise_free_top(tool->data); - osnoise_destroy_tool(tool); - return NULL; } static int stop_tracing; |