diff options
334 files changed, 8232 insertions, 3929 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events index 20979f8b3edb..505f080d20a1 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events @@ -52,12 +52,18 @@ Description: Per-pmu performance monitoring events specific to the running syste event=0x2abc event=0x423,inv,cmask=0x3 domain=0x1,offset=0x8,starting_index=0xffff + domain=0x1,offset=0x8,core=? Each of the assignments indicates a value to be assigned to a particular set of bits (as defined by the format file corresponding to the <term>) in the perf_event structure passed to the perf_open syscall. + In the case of the last example, a value replacing "?" would + need to be provided by the user selecting the particular event. + This is referred to as "event parameterization". Event + parameters have the format 'param=?'. + What: /sys/bus/event_source/devices/<pmu>/events/<event>.unit Date: 2014/02/24 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index ed186a902d31..b57c0c1cdac6 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -15,7 +15,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT 21 seconds. This configuration parameter may be changed at runtime via the - /sys/module/rcutree/parameters/rcu_cpu_stall_timeout, however + /sys/module/rcupdate/parameters/rcu_cpu_stall_timeout, however this parameter is checked only at the beginning of a cycle. So if you are 10 seconds into a 40-second stall, setting this sysfs parameter to (say) five will shorten the timeout for the @@ -152,6 +152,15 @@ no non-lazy callbacks ("." is printed otherwise, as shown above) and "D" indicates that dyntick-idle processing is enabled ("." is printed otherwise, for example, if disabled via the "nohz=" kernel boot parameter). +If the relevant grace-period kthread has been unable to run prior to +the stall warning, the following additional line is printed: + + rcu_preempt kthread starved for 2023 jiffies! + +Starving the grace-period kthreads of CPU time can of course result in +RCU CPU stall warnings even when all CPUs and tasks have passed through +the required quiescent states. + Multiple Warnings From One Stall @@ -187,6 +196,11 @@ o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the behavior, you might need to replace some of the cond_resched() calls with calls to cond_resched_rcu_qs(). +o Anything that prevents RCU's grace-period kthreads from running. + This can result in the "All QSes seen" console-log message. + This message will include information on when the kthread last + ran and how often it should be expected to run. + o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might happen to preempt a low-priority task in the middle of an RCU read-side critical section. This is especially damaging if diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index b63b9bb3bc0c..08651da15448 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -56,14 +56,14 @@ rcuboost: The output of "cat rcu/rcu_preempt/rcudata" looks as follows: - 0!c=30455 g=30456 pq=1 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 - 1!c=30719 g=30720 pq=1 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 - 2!c=30150 g=30151 pq=1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 - 3 c=31249 g=31250 pq=1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 - 4!c=29502 g=29503 pq=1 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 - 5 c=31201 g=31202 pq=1 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 - 6!c=30253 g=30254 pq=1 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 - 7 c=31178 g=31178 pq=1 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 + 0!c=30455 g=30456 pq=1/0 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 + 1!c=30719 g=30720 pq=1/0 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 + 2!c=30150 g=30151 pq=1/1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 + 3 c=31249 g=31250 pq=1/1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 + 4!c=29502 g=29503 pq=1/0 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 + 5 c=31201 g=31202 pq=1/0 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 + 6!c=30253 g=30254 pq=1/0 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 + 7 c=31178 g=31178 pq=1/0 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 This file has one line per CPU, or eight for this 8-CPU system. The fields are as follows: @@ -188,14 +188,14 @@ o "ca" is the number of RCU callbacks that have been adopted by this Kernels compiled with CONFIG_RCU_BOOST=y display the following from /debug/rcu/rcu_preempt/rcudata: - 0!c=12865 g=12866 pq=1 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 - 1 c=14407 g=14408 pq=1 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 - 2 c=14407 g=14408 pq=1 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 - 3 c=14407 g=14408 pq=1 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 - 4 c=14405 g=14406 pq=1 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 - 5!c=14168 g=14169 pq=1 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 - 6 c=14404 g=14405 pq=1 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 - 7 c=14407 g=14408 pq=1 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 + 0!c=12865 g=12866 pq=1/0 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 + 1 c=14407 g=14408 pq=1/0 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 + 2 c=14407 g=14408 pq=1/0 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 + 3 c=14407 g=14408 pq=1/0 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 + 4 c=14405 g=14406 pq=1/0 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 + 5!c=14168 g=14169 pq=1/0 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 + 6 c=14404 g=14405 pq=1/0 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 + 7 c=14407 g=14408 pq=1/0 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 This is similar to the output discussed above, but contains the following additional fields: diff --git a/Documentation/devicetree/bindings/mfd/max77686.txt b/Documentation/devicetree/bindings/mfd/max77686.txt index 75fdfaf41831..e39f0bc1f55e 100644 --- a/Documentation/devicetree/bindings/mfd/max77686.txt +++ b/Documentation/devicetree/bindings/mfd/max77686.txt @@ -39,6 +39,12 @@ to get matched with their hardware counterparts as follow: -BUCKn : 1-4. Use standard regulator bindings for it ('regulator-off-in-suspend'). + LDO20, LDO21, LDO22, BUCK8 and BUCK9 can be configured to GPIO enable + control. To turn this feature on this property must be added to the regulator + sub-node: + - maxim,ena-gpios : one GPIO specifier enable control (the gpio + flags are actually ignored and always + ACTIVE_HIGH is used) Example: @@ -65,4 +71,12 @@ Example: regulator-always-on; regulator-boot-on; }; + + buck9_reg { + regulator-compatible = "BUCK9"; + regulator-name = "CAM_ISP_CORE_1.2V"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1200000>; + maxim,ena-gpios = <&gpm0 3 GPIO_ACTIVE_HIGH>; + }; } diff --git a/Documentation/devicetree/bindings/regulator/da9211.txt b/Documentation/devicetree/bindings/regulator/da9211.txt index 240019a82f9a..eb618907c7de 100644 --- a/Documentation/devicetree/bindings/regulator/da9211.txt +++ b/Documentation/devicetree/bindings/regulator/da9211.txt @@ -11,6 +11,7 @@ Required properties: BUCKA and BUCKB. Optional properties: +- enable-gpios: platform gpio for control of BUCKA/BUCKB. - Any optional property defined in regulator.txt Example 1) DA9211 @@ -27,6 +28,7 @@ Example 1) DA9211 regulator-max-microvolt = <1570000>; regulator-min-microamp = <2000000>; regulator-max-microamp = <5000000>; + enable-gpios = <&gpio 27 0>; }; BUCKB { regulator-name = "VBUCKB"; @@ -34,11 +36,12 @@ Example 1) DA9211 regulator-max-microvolt = <1570000>; regulator-min-microamp = <2000000>; regulator-max-microamp = <5000000>; + enable-gpios = <&gpio 17 0>; }; }; }; -Example 2) DA92113 +Example 2) DA9213 pmic: da9213@68 { compatible = "dlg,da9213"; reg = <0x68>; @@ -51,6 +54,7 @@ Example 2) DA92113 regulator-max-microvolt = <1570000>; regulator-min-microamp = <3000000>; regulator-max-microamp = <6000000>; + enable-gpios = <&gpio 27 0>; }; BUCKB { regulator-name = "VBUCKB"; @@ -58,6 +62,7 @@ Example 2) DA92113 regulator-max-microvolt = <1570000>; regulator-min-microamp = <3000000>; regulator-max-microamp = <6000000>; + enable-gpios = <&gpio 17 0>; }; }; }; diff --git a/Documentation/devicetree/bindings/regulator/isl9305.txt b/Documentation/devicetree/bindings/regulator/isl9305.txt index a626fc1bbf0d..d6e7c9ec9413 100644 --- a/Documentation/devicetree/bindings/regulator/isl9305.txt +++ b/Documentation/devicetree/bindings/regulator/isl9305.txt @@ -2,7 +2,7 @@ Intersil ISL9305/ISL9305H voltage regulator Required properties: -- compatible: "isl,isl9305" or "isl,isl9305h" +- compatible: "isil,isl9305" or "isil,isl9305h" - reg: I2C slave address, usually 0x68. - regulators: A node that houses a sub-node for each regulator within the device. Each sub-node is identified using the node's name, with valid @@ -19,7 +19,7 @@ Optional properties: Example pmic: isl9305@68 { - compatible = "isl,isl9305"; + compatible = "isil,isl9305"; reg = <0x68>; VINDCD1-supply = <&system_power>; diff --git a/Documentation/devicetree/bindings/regulator/mt6397-regulator.txt b/Documentation/devicetree/bindings/regulator/mt6397-regulator.txt new file mode 100644 index 000000000000..a42b1d6e9863 --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/mt6397-regulator.txt @@ -0,0 +1,217 @@ +Mediatek MT6397 Regulator Driver + +Required properties: +- compatible: "mediatek,mt6397-regulator" +- mt6397regulator: List of regulators provided by this controller. It is named + according to its regulator type, buck_<name> and ldo_<name>. + The definition for each of these nodes is defined using the standard binding + for regulators at Documentation/devicetree/bindings/regulator/regulator.txt. + +The valid names for regulators are:: +BUCK: + buck_vpca15, buck_vpca7, buck_vsramca15, buck_vsramca7, buck_vcore, buck_vgpu, + buck_vdrm, buck_vio18 +LDO: + ldo_vtcxo, ldo_va28, ldo_vcama, ldo_vio28, ldo_vusb, ldo_vmc, ldo_vmch, + ldo_vemc3v3, ldo_vgp1, ldo_vgp2, ldo_vgp3, ldo_vgp4, ldo_vgp5, ldo_vgp6, + ldo_vibr + +Example: + pmic { + compatible = "mediatek,mt6397"; + + mt6397regulator: mt6397regulator { + compatible = "mediatek,mt6397-regulator"; + + mt6397_vpca15_reg: buck_vpca15 { + regulator-compatible = "buck_vpca15"; + regulator-name = "vpca15"; + regulator-min-microvolt = < 850000>; + regulator-max-microvolt = <1350000>; + regulator-ramp-delay = <12500>; + regulator-enable-ramp-delay = <200>; + }; + + mt6397_vpca7_reg: buck_vpca7 { + regulator-compatible = "buck_vpca7"; + regulator-name = "vpca7"; + regulator-min-microvolt = < 850000>; + regulator-max-microvolt = <1350000>; + regulator-ramp-delay = <12500>; + regulator-enable-ramp-delay = <115>; + }; + + mt6397_vsramca15_reg: buck_vsramca15 { + regulator-compatible = "buck_vsramca15"; + regulator-name = "vsramca15"; + regulator-min-microvolt = < 850000>; + regulator-max-microvolt = <1350000>; + regulator-ramp-delay = <12500>; + regulator-enable-ramp-delay = <115>; + + }; + + mt6397_vsramca7_reg: buck_vsramca7 { + regulator-compatible = "buck_vsramca7"; + regulator-name = "vsramca7"; + regulator-min-microvolt = < 850000>; + regulator-max-microvolt = <1350000>; + regulator-ramp-delay = <12500>; + regulator-enable-ramp-delay = <115>; + + }; + + mt6397_vcore_reg: buck_vcore { + regulator-compatible = "buck_vcore"; + regulator-name = "vcore"; + regulator-min-microvolt = < 850000>; + regulator-max-microvolt = <1350000>; + regulator-ramp-delay = <12500>; + regulator-enable-ramp-delay = <115>; + }; + + mt6397_vgpu_reg: buck_vgpu { + regulator-compatible = "buck_vgpu"; + regulator-name = "vgpu"; + regulator-min-microvolt = < 700000>; + regulator-max-microvolt = <1350000>; + regulator-ramp-delay = <12500>; + regulator-enable-ramp-delay = <115>; + }; + + mt6397_vdrm_reg: buck_vdrm { + regulator-compatible = "buck_vdrm"; + regulator-name = "vdrm"; + regulator-min-microvolt = < 800000>; + regulator-max-microvolt = <1400000>; + regulator-ramp-delay = <12500>; + regulator-enable-ramp-delay = <500>; + }; + + mt6397_vio18_reg: buck_vio18 { + regulator-compatible = "buck_vio18"; + regulator-name = "vio18"; + regulator-min-microvolt = <1500000>; + regulator-max-microvolt = <2120000>; + regulator-ramp-delay = <12500>; + regulator-enable-ramp-delay = <500>; + }; + + mt6397_vtcxo_reg: ldo_vtcxo { + regulator-compatible = "ldo_vtcxo"; + regulator-name = "vtcxo"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + regulator-enable-ramp-delay = <90>; + }; + + mt6397_va28_reg: ldo_va28 { + regulator-compatible = "ldo_va28"; + regulator-name = "va28"; + /* fixed output 2.8 V */ + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vcama_reg: ldo_vcama { + regulator-compatible = "ldo_vcama"; + regulator-name = "vcama"; + regulator-min-microvolt = <1500000>; + regulator-max-microvolt = <2800000>; + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vio28_reg: ldo_vio28 { + regulator-compatible = "ldo_vio28"; + regulator-name = "vio28"; + /* fixed output 2.8 V */ + regulator-enable-ramp-delay = <240>; + }; + + mt6397_usb_reg: ldo_vusb { + regulator-compatible = "ldo_vusb"; + regulator-name = "vusb"; + /* fixed output 3.3 V */ + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vmc_reg: ldo_vmc { + regulator-compatible = "ldo_vmc"; + regulator-name = "vmc"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vmch_reg: ldo_vmch { + regulator-compatible = "ldo_vmch"; + regulator-name = "vmch"; + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vemc_3v3_reg: ldo_vemc3v3 { + regulator-compatible = "ldo_vemc3v3"; + regulator-name = "vemc_3v3"; + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vgp1_reg: ldo_vgp1 { + regulator-compatible = "ldo_vgp1"; + regulator-name = "vcamd"; + regulator-min-microvolt = <1220000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <240>; + }; + + mt6397_vgp2_reg: ldo_vgp2 { + egulator-compatible = "ldo_vgp2"; + regulator-name = "vcamio"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vgp3_reg: ldo_vgp3 { + regulator-compatible = "ldo_vgp3"; + regulator-name = "vcamaf"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vgp4_reg: ldo_vgp4 { + regulator-compatible = "ldo_vgp4"; + regulator-name = "vgp4"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vgp5_reg: ldo_vgp5 { + regulator-compatible = "ldo_vgp5"; + regulator-name = "vgp5"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3000000>; + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vgp6_reg: ldo_vgp6 { + regulator-compatible = "ldo_vgp6"; + regulator-name = "vgp6"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <218>; + }; + + mt6397_vibr_reg: ldo_vibr { + regulator-compatible = "ldo_vibr"; + regulator-name = "vibr"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <218>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/regulator/pfuze100.txt b/Documentation/devicetree/bindings/regulator/pfuze100.txt index 34ef5d16d0f1..9b40db88f637 100644 --- a/Documentation/devicetree/bindings/regulator/pfuze100.txt +++ b/Documentation/devicetree/bindings/regulator/pfuze100.txt @@ -1,7 +1,7 @@ PFUZE100 family of regulators Required properties: -- compatible: "fsl,pfuze100" or "fsl,pfuze200" +- compatible: "fsl,pfuze100", "fsl,pfuze200", "fsl,pfuze3000" - reg: I2C slave address Required child node: @@ -14,6 +14,8 @@ Required child node: sw1ab,sw1c,sw2,sw3a,sw3b,sw4,swbst,vsnvs,vrefddr,vgen1~vgen6 --PFUZE200 sw1ab,sw2,sw3a,sw3b,swbst,vsnvs,vrefddr,vgen1~vgen6 + --PFUZE3000 + sw1a,sw1b,sw2,sw3,swbst,vsnvs,vrefddr,vldo1,vldo2,vccsd,v33,vldo3,vldo4 Each regulator is defined using the standard binding for regulators. @@ -205,3 +207,93 @@ Example 2: PFUZE200 }; }; }; + +Example 3: PFUZE3000 + + pmic: pfuze3000@08 { + compatible = "fsl,pfuze3000"; + reg = <0x08>; + + regulators { + sw1a_reg: sw1a { + regulator-min-microvolt = <700000>; + regulator-max-microvolt = <1475000>; + regulator-boot-on; + regulator-always-on; + regulator-ramp-delay = <6250>; + }; + /* use sw1c_reg to align with pfuze100/pfuze200 */ + sw1c_reg: sw1b { + regulator-min-microvolt = <700000>; + regulator-max-microvolt = <1475000>; + regulator-boot-on; + regulator-always-on; + regulator-ramp-delay = <6250>; + }; + + sw2_reg: sw2 { + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + regulator-always-on; + }; + + sw3a_reg: sw3 { + regulator-min-microvolt = <900000>; + regulator-max-microvolt = <1650000>; + regulator-boot-on; + regulator-always-on; + }; + + swbst_reg: swbst { + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5150000>; + }; + + snvs_reg: vsnvs { + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <3000000>; + regulator-boot-on; + regulator-always-on; + }; + + vref_reg: vrefddr { + regulator-boot-on; + regulator-always-on; + }; + + vgen1_reg: vldo1 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + vgen2_reg: vldo2 { + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1550000>; + }; + + vgen3_reg: vccsd { + regulator-min-microvolt = <2850000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + vgen4_reg: v33 { + regulator-min-microvolt = <2850000>; + regulator-max-microvolt = <3300000>; + }; + + vgen5_reg: vldo3 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + vgen6_reg: vldo4 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt index d11c3721e7cd..4c388bb2f0a2 100644 --- a/Documentation/devicetree/bindings/spi/sh-msiof.txt +++ b/Documentation/devicetree/bindings/spi/sh-msiof.txt @@ -30,6 +30,22 @@ Optional properties: specifiers, one for transmission, and one for reception. - dma-names : Must contain a list of two DMA names, "tx" and "rx". +- renesas,dtdl : delay sync signal (setup) in transmit mode. + Must contain one of the following values: + 0 (no bit delay) + 50 (0.5-clock-cycle delay) + 100 (1-clock-cycle delay) + 150 (1.5-clock-cycle delay) + 200 (2-clock-cycle delay) + +- renesas,syncdl : delay sync signal (hold) in transmit mode. + Must contain one of the following values: + 0 (no bit delay) + 50 (0.5-clock-cycle delay) + 100 (1-clock-cycle delay) + 150 (1.5-clock-cycle delay) + 200 (2-clock-cycle delay) + 300 (3-clock-cycle delay) Optional properties, deprecated for soctype-specific bindings: - renesas,tx-fifo-size : Overrides the default tx fifo size given in words diff --git a/Documentation/devicetree/bindings/spi/spi-sirf.txt b/Documentation/devicetree/bindings/spi/spi-sirf.txt new file mode 100644 index 000000000000..4c7adb8f777c --- /dev/null +++ b/Documentation/devicetree/bindings/spi/spi-sirf.txt @@ -0,0 +1,41 @@ +* CSR SiRFprimaII Serial Peripheral Interface + +Required properties: +- compatible : Should be "sirf,prima2-spi" +- reg : Offset and length of the register set for the device +- interrupts : Should contain SPI interrupt +- resets: phandle to the reset controller asserting this device in + reset + See ../reset/reset.txt for details. +- dmas : Must contain an entry for each entry in clock-names. + See ../dma/dma.txt for details. +- dma-names : Must include the following entries: + - rx + - tx +- clocks : Must contain an entry for each entry in clock-names. + See ../clocks/clock-bindings.txt for details. + +- #address-cells: Number of cells required to define a chip select + address on the SPI bus. Should be set to 1. +- #size-cells: Should be zero. + +Optional properties: +- spi-max-frequency: Specifies maximum SPI clock frequency, + Units - Hz. Definition as per + Documentation/devicetree/bindings/spi/spi-bus.txt +- cs-gpios: should specify GPIOs used for chipselects. + +Example: + +spi0: spi@b00d0000 { + compatible = "sirf,prima2-spi"; + reg = <0xb00d0000 0x10000>; + interrupts = <15>; + dmas = <&dmac1 9>, + <&dmac1 4>; + dma-names = "rx", "tx"; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clks 19>; + resets = <&rstc 26>; +}; diff --git a/Documentation/devicetree/bindings/spi/spi-st-ssc.txt b/Documentation/devicetree/bindings/spi/spi-st-ssc.txt new file mode 100644 index 000000000000..fe54959ec957 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/spi-st-ssc.txt @@ -0,0 +1,40 @@ +STMicroelectronics SSC (SPI) Controller +--------------------------------------- + +Required properties: +- compatible : "st,comms-ssc4-spi" +- reg : Offset and length of the device's register set +- interrupts : The interrupt specifier +- clock-names : Must contain "ssc" +- clocks : Must contain an entry for each name in clock-names + See ../clk/* +- pinctrl-names : Uses "default", can use "sleep" if provided + See ../pinctrl/pinctrl-binding.txt + +Optional properties: +- cs-gpios : List of GPIO chip selects + See ../spi/spi-bus.txt + +Child nodes represent devices on the SPI bus + See ../spi/spi-bus.txt + +Example: + spi@9840000 { + compatible = "st,comms-ssc4-spi"; + reg = <0x9840000 0x110>; + interrupts = <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk_s_c0_flexgen CLK_EXT2F_A9>; + clock-names = "ssc"; + pinctrl-0 = <&pinctrl_spi0_default>; + pinctrl-names = "default"; + cs-gpios = <&pio17 5 0>; + #address-cells = <1>; + #size-cells = <0>; + + st95hf@0{ + compatible = "st,st95hf"; + reg = <0>; + spi-max-frequency = <1000000>; + interrupts = <2 IRQ_TYPE_EDGE_FALLING>; + }; + }; diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt index 31b16610c416..77b36f59d16b 100644 --- a/Documentation/futex-requeue-pi.txt +++ b/Documentation/futex-requeue-pi.txt @@ -98,7 +98,7 @@ rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which allow the requeue code to acquire an uncontended rt_mutex on behalf of the waiter and to enqueue the waiter on a contended rt_mutex. Two new system calls provide the kernel<->user interface to -requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI. +requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI. FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait() and pthread_cond_timedwait()) to block on the initial futex and wait @@ -107,7 +107,7 @@ result of a high-speed collision between futex_wait() and futex_lock_pi(), with some extra logic to check for the additional wake-up scenarios. -FUTEX_REQUEUE_CMP_PI is called by the waker +FUTEX_CMP_REQUEUE_PI is called by the waker (pthread_cond_broadcast() and pthread_cond_signal()) to requeue and possibly wake the waiting tasks. Internally, this system call is still handled by futex_requeue (by passing requeue_pi=1). Before @@ -120,12 +120,12 @@ task as a waiter on the underlying rt_mutex. It is possible that the lock can be acquired at this stage as well, if so, the next waiter is woken to finish the acquisition of the lock. -FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but +FUTEX_CMP_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but their sum is all that really matters. futex_requeue() will wake or requeue up to nr_wake + nr_requeue tasks. It will wake only as many tasks as it can acquire the lock for, which in the majority of cases should be 0 as good programming practice dictates that the caller of either pthread_cond_broadcast() or pthread_cond_signal() acquire the -mutex prior to making the call. FUTEX_REQUEUE_PI requires that +mutex prior to making the call. FUTEX_CMP_REQUEUE_PI requires that nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for signal. diff --git a/Documentation/hwmon/ina2xx b/Documentation/hwmon/ina2xx index 4223c2d3b508..cfd31d94c872 100644 --- a/Documentation/hwmon/ina2xx +++ b/Documentation/hwmon/ina2xx @@ -26,6 +26,12 @@ Supported chips: Datasheet: Publicly available at the Texas Instruments website http://www.ti.com/ + * Texas Instruments INA231 + Prefix: 'ina231' + Addresses: I2C 0x40 - 0x4f + Datasheet: Publicly available at the Texas Instruments website + http://www.ti.com/ + Author: Lothar Felten <l-felten@ti.com> Description @@ -41,9 +47,18 @@ interface. The INA220 monitors both shunt drop and supply voltage. The INA226 is a current shunt and power monitor with an I2C interface. The INA226 monitors both a shunt voltage drop and bus supply voltage. -The INA230 is a high or low side current shunt and power monitor with an I2C -interface. The INA230 monitors both a shunt voltage drop and bus supply voltage. +INA230 and INA231 are high or low side current shunt and power monitors +with an I2C interface. The chips monitor both a shunt voltage drop and +bus supply voltage. -The shunt value in micro-ohms can be set via platform data or device tree. -Please refer to the Documentation/devicetree/bindings/i2c/ina2xx.txt for bindings +The shunt value in micro-ohms can be set via platform data or device tree at +compile-time or via the shunt_resistor attribute in sysfs at run-time. Please +refer to the Documentation/devicetree/bindings/i2c/ina2xx.txt for bindings if the device tree is used. + +Additionally ina226 supports update_interval attribute as described in +Documentation/hwmon/sysfs-interface. Internally the interval is the sum of +bus and shunt voltage conversion times multiplied by the averaging rate. We +don't touch the conversion times and only modify the number of averages. The +lower limit of the update_interval is 2 ms, the upper limit is 2253 ms. +The actual programmed interval may vary from the desired value. diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt index 5dbc99c04f6e..5001280e9d82 100644 --- a/Documentation/locking/lockdep-design.txt +++ b/Documentation/locking/lockdep-design.txt @@ -34,7 +34,7 @@ The validator tracks lock-class usage history into 4n + 1 separate state bits: - 'ever held with STATE enabled' - 'ever held as readlock with STATE enabled' -Where STATE can be either one of (kernel/lockdep_states.h) +Where STATE can be either one of (kernel/locking/lockdep_states.h) - hardirq - softirq - reclaim_fs diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 70a09f8a0383..ca2387ef27ab 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -269,6 +269,50 @@ And there are a number of things that _must_ or _must_not_ be assumed: STORE *(A + 4) = Y; STORE *A = X; STORE {*A, *(A + 4) } = {X, Y}; +And there are anti-guarantees: + + (*) These guarantees do not apply to bitfields, because compilers often + generate code to modify these using non-atomic read-modify-write + sequences. Do not attempt to use bitfields to synchronize parallel + algorithms. + + (*) Even in cases where bitfields are protected by locks, all fields + in a given bitfield must be protected by one lock. If two fields + in a given bitfield are protected by different locks, the compiler's + non-atomic read-modify-write sequences can cause an update to one + field to corrupt the value of an adjacent field. + + (*) These guarantees apply only to properly aligned and sized scalar + variables. "Properly sized" currently means variables that are + the same size as "char", "short", "int" and "long". "Properly + aligned" means the natural alignment, thus no constraints for + "char", two-byte alignment for "short", four-byte alignment for + "int", and either four-byte or eight-byte alignment for "long", + on 32-bit and 64-bit systems, respectively. Note that these + guarantees were introduced into the C11 standard, so beware when + using older pre-C11 compilers (for example, gcc 4.6). The portion + of the standard containing this guarantee is Section 3.14, which + defines "memory location" as follows: + + memory location + either an object of scalar type, or a maximal sequence + of adjacent bit-fields all having nonzero width + + NOTE 1: Two threads of execution can update and access + separate memory locations without interfering with + each other. + + NOTE 2: A bit-field and an adjacent non-bit-field member + are in separate memory locations. The same applies + to two bit-fields, if one is declared inside a nested + structure declaration and the other is not, or if the two + are separated by a zero-length bit-field declaration, + or if they are separated by a non-bit-field member + declaration. It is not safe to concurrently update two + bit-fields in the same structure if all members declared + between them are also bit-fields, no matter what the + sizes of those intervening bit-fields happen to be. + ========================= WHAT ARE MEMORY BARRIERS? @@ -750,7 +794,7 @@ In summary: However, they do -not- guarantee any other sort of ordering: Not prior loads against later loads, nor prior stores against later anything. If you need these other forms of ordering, - use smb_rmb(), smp_wmb(), or, in the case of prior stores and + use smp_rmb(), smp_wmb(), or, in the case of prior stores and later loads, smp_mb(). (*) If both legs of the "if" statement begin with identical stores diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt index 4a1c5c2dc5a9..9132b86176a3 100644 --- a/Documentation/x86/entry_64.txt +++ b/Documentation/x86/entry_64.txt @@ -78,9 +78,6 @@ The expensive (paranoid) way is to read back the MSR_GS_BASE value xorl %ebx,%ebx 1: ret -and the whole paranoid non-paranoid macro complexity is about whether -to suffer that RDMSR cost. - If we are at an interrupt or user-trap/gate-alike boundary then we can use the faster check: the stack will be a reliable indicator of whether SWAPGS was already done: if we see that we are a secondary @@ -93,6 +90,15 @@ which might have triggered right after a normal entry wrote CS to the stack but before we executed SWAPGS, then the only safe way to check for GS is the slower method: the RDMSR. -So we try only to mark those entry methods 'paranoid' that absolutely -need the more expensive check for the GS base - and we generate all -'normal' entry points with the regular (faster) entry macros. +Therefore, super-atomic entries (except NMI, which is handled separately) +must use idtentry with paranoid=1 to handle gsbase correctly. This +triggers three main behavior changes: + + - Interrupt entry will use the slower gsbase check. + - Interrupt entry from user mode will switch off the IST stack. + - Interrupt exit to kernel mode will not attempt to reschedule. + +We try to only use IST entries and the paranoid entry code for vectors +that absolutely need the more expensive check for the GS base - and we +generate all 'normal' entry points with the regular (faster) paranoid=0 +variant. diff --git a/Documentation/x86/x86_64/kernel-stacks b/Documentation/x86/x86_64/kernel-stacks index a01eec5d1d0b..e3c8a49d1a2f 100644 --- a/Documentation/x86/x86_64/kernel-stacks +++ b/Documentation/x86/x86_64/kernel-stacks @@ -40,9 +40,11 @@ An IST is selected by a non-zero value in the IST field of an interrupt-gate descriptor. When an interrupt occurs and the hardware loads such a descriptor, the hardware automatically sets the new stack pointer based on the IST value, then invokes the interrupt handler. If -software wants to allow nested IST interrupts then the handler must -adjust the IST values on entry to and exit from the interrupt handler. -(This is occasionally done, e.g. for debug exceptions.) +the interrupt came from user mode, then the interrupt handler prologue +will switch back to the per-thread stack. If software wants to allow +nested IST interrupts then the handler must adjust the IST values on +entry to and exit from the interrupt handler. (This is occasionally +done, e.g. for debug exceptions.) Events with different IST codes (i.e. with different stacks) can be nested. For example, a debug interrupt can safely be interrupted by an diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt index 199f453cb4de..82fbdbc1e0b0 100644 --- a/Documentation/x86/zero-page.txt +++ b/Documentation/x86/zero-page.txt @@ -3,7 +3,7 @@ protocol of kernel. These should be filled by bootloader or 16-bit real-mode setup code of the kernel. References/settings to it mainly are in: - arch/x86/include/asm/bootparam.h + arch/x86/include/uapi/asm/bootparam.h Offset Proto Name Meaning diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 466bd299b1a8..3afee5f40f4f 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,6 +23,7 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST + select SRCU depends on ARM_VIRT_EXT && ARM_LPAE ---help--- Support hosting virtualized guest machines. You will also diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c index d27dd982ff26..f5374065ad53 100644 --- a/arch/arm64/kernel/efi-stub.c +++ b/arch/arm64/kernel/efi-stub.c @@ -13,13 +13,13 @@ #include <asm/efi.h> #include <asm/sections.h> -efi_status_t handle_kernel_image(efi_system_table_t *sys_table, - unsigned long *image_addr, - unsigned long *image_size, - unsigned long *reserve_addr, - unsigned long *reserve_size, - unsigned long dram_base, - efi_loaded_image_t *image) +efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) { efi_status_t status; unsigned long kernel_size, kernel_memsize = 0; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 8ba85e9ea388..b334084d3675 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -26,6 +26,7 @@ config KVM select KVM_ARM_HOST select KVM_ARM_VGIC select KVM_ARM_TIMER + select SRCU ---help--- Support hosting virtualized guest machines. diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 30e334e823bd..2ae12825529f 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -20,6 +20,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select KVM_MMIO + select SRCU ---help--- Support for hosting Guest kernels. Currently supported on MIPS32 processors. diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index f5769f19ae25..11850f310fb4 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_EVENTFD + select SRCU config KVM_BOOK3S_HANDLER bool diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 646db9c467d1..5fce52cf0e57 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -28,6 +28,7 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING + select SRCU ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/tile/kvm/Kconfig b/arch/tile/kvm/Kconfig index 2298cb1daff7..1e968f7550dc 100644 --- a/arch/tile/kvm/Kconfig +++ b/arch/tile/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM depends on HAVE_KVM && MODULES select PREEMPT_NOTIFIERS select ANON_INODES + select SRCU ---help--- Support hosting paravirtualized guest machines. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0dc9d0144a27..5e28e2be3a41 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -138,6 +138,7 @@ config X86 select HAVE_ACPI_APEI_NMI if ACPI select ACPI_LEGACY_TABLES_LOOKUP if ACPI select X86_FEATURE_NAMES if PROC_FS + select SRCU config INSTRUCTION_DECODER def_bool y @@ -855,6 +856,10 @@ config SCHED_MC source "kernel/Kconfig.preempt" +config UP_LATE_INIT + def_bool y + depends on !SMP && X86_LOCAL_APIC + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on X86_32 && !SMP && !X86_32_NON_STANDARD diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ad754b4411f7..8bd44e8ee6e2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -49,6 +49,7 @@ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o $(obj)/vmlinux: $(vmlinux-objs-y) FORCE $(call if_changed,ld) diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S index 7ff3632806b1..99494dff2113 100644 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ b/arch/x86/boot/compressed/efi_stub_64.S @@ -3,28 +3,3 @@ #include <asm/processor-flags.h> #include "../../platform/efi/efi_stub_64.S" - -#ifdef CONFIG_EFI_MIXED - .code64 - .text -ENTRY(efi64_thunk) - push %rbp - push %rbx - - subq $16, %rsp - leaq efi_exit32(%rip), %rax - movl %eax, 8(%rsp) - leaq efi_gdt64(%rip), %rax - movl %eax, 4(%rsp) - movl %eax, 2(%rax) /* Fixup the gdt base address */ - leaq efi32_boot_gdt(%rip), %rax - movl %eax, (%rsp) - - call __efi64_thunk - - addq $16, %rsp - pop %rbx - pop %rbp - ret -ENDPROC(efi64_thunk) -#endif /* CONFIG_EFI_MIXED */ diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S new file mode 100644 index 000000000000..630384a4c14a --- /dev/null +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming + * + * Early support for invoking 32-bit EFI services from a 64-bit kernel. + * + * Because this thunking occurs before ExitBootServices() we have to + * restore the firmware's 32-bit GDT before we make EFI serivce calls, + * since the firmware's 32-bit IDT is still currently installed and it + * needs to be able to service interrupts. + * + * On the plus side, we don't have to worry about mangling 64-bit + * addresses into 32-bits because we're executing with an identify + * mapped pagetable and haven't transitioned to 64-bit virtual addresses + * yet. + */ + +#include <linux/linkage.h> +#include <asm/msr.h> +#include <asm/page_types.h> +#include <asm/processor-flags.h> +#include <asm/segment.h> + + .code64 + .text +ENTRY(efi64_thunk) + push %rbp + push %rbx + + subq $8, %rsp + leaq efi_exit32(%rip), %rax + movl %eax, 4(%rsp) + leaq efi_gdt64(%rip), %rax + movl %eax, (%rsp) + movl %eax, 2(%rax) /* Fixup the gdt base address */ + + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + /* + * Convert x86-64 ABI params to i386 ABI + */ + subq $32, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movq %r8, %rsi + movl %esi, 0xc(%rsp) + movq %r9, %rsi + movl %esi, 0x10(%rsp) + + sgdt save_gdt(%rip) + + leaq 1f(%rip), %rbx + movq %rbx, func_rt_ptr(%rip) + + /* + * Switch to gdt with 32-bit segments. This is the firmware GDT + * that was installed when the kernel started executing. This + * pointer was saved at the EFI stub entry point in head_64.S. + */ + leaq efi32_boot_gdt(%rip), %rax + lgdt (%rax) + + pushq $__KERNEL_CS + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $32, %rsp + + lgdt save_gdt(%rip) + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + + /* + * Convert 32-bit status code into 64-bit. + */ + test %rax, %rax + jz 1f + movl %eax, %ecx + andl $0x0fffffff, %ecx + andl $0xf0000000, %eax + shl $32, %rax + or %rcx, %rax +1: + addq $8, %rsp + pop %rbx + pop %rbp + ret +ENDPROC(efi64_thunk) + +ENTRY(efi_exit32) + movq func_rt_ptr(%rip), %rax + push %rax + mov %rdi, %rax + ret +ENDPROC(efi_exit32) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + /* Reload pgtables */ + movl %cr3, %eax + movl %eax, %cr3 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Disable long mode via EFER */ + movl $MSR_EFER, %ecx + rdmsr + btrl $_EFER_LME, %eax + wrmsr + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + /* + * Some firmware will return with interrupts enabled. Be sure to + * disable them before we switch GDTs. + */ + cli + + movl 56(%esp), %eax + movl %eax, 2(%eax) + lgdtl (%eax) + + movl %cr4, %eax + btsl $(X86_CR4_PAE_BIT), %eax + movl %eax, %cr4 + + movl %cr3, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + xorl %eax, %eax + lldt %ax + + movl 60(%esp), %eax + pushl $__KERNEL_CS + pushl %eax + + /* Enable paging */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + lret +ENDPROC(efi_enter32) + + .data + .balign 8 + .global efi32_boot_gdt +efi32_boot_gdt: .word 0 + .quad 0 + +save_gdt: .word 0 + .quad 0 +func_rt_ptr: .quad 0 + + .global efi_gdt64 +efi_gdt64: + .word efi_gdt64_end - efi_gdt64 + .long 0 /* Filled out by user */ + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ +efi_gdt64_end: diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h index 25e13403193c..020f137df7a2 100644 --- a/arch/x86/boot/ctype.h +++ b/arch/x86/boot/ctype.h @@ -1,6 +1,5 @@ -#ifndef BOOT_ISDIGIT_H - -#define BOOT_ISDIGIT_H +#ifndef BOOT_CTYPE_H +#define BOOT_CTYPE_H static inline int isdigit(int ch) { diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 5df2869c874b..45a07684bbab 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c @@ -2,8 +2,6 @@ #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ -#define XMTRDY 0x20 - #define DLAB 0x80 #define TXR 0 /* Transmit register (WRITE) */ @@ -74,8 +72,8 @@ static void parse_earlyprintk(void) static const int bases[] = { 0x3f8, 0x2f8 }; int idx = 0; - if (!strncmp(arg + pos, "ttyS", 4)) - pos += 4; + /* += strlen("ttyS"); */ + pos += 4; if (arg[pos++] == '1') idx = 1; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 82e8a1d44658..156ebcab4ada 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -179,8 +179,8 @@ sysenter_dispatch: sysexit_from_sys_call: andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) /* clear IF, that popfq doesn't enable interrupts early */ - andl $~0x200,EFLAGS-R11(%rsp) - movl RIP-R11(%rsp),%edx /* User %eip */ + andl $~0x200,EFLAGS-ARGOFFSET(%rsp) + movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ CFI_REGISTER rip,rdx RESTORE_ARGS 0,24,0,0,0,0 xorq %r8,%r8 diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 465b309af254..efc3b22d896e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -106,7 +106,14 @@ extern u32 native_safe_apic_wait_icr_idle(void); extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); -extern int x2apic_mode; +static inline bool apic_is_x2apic_enabled(void) +{ + u64 msr; + + if (rdmsrl_safe(MSR_IA32_APICBASE, &msr)) + return false; + return msr & X2APIC_ENABLE; +} #ifdef CONFIG_X86_X2APIC /* @@ -169,48 +176,23 @@ static inline u64 native_x2apic_icr_read(void) return val; } +extern int x2apic_mode; extern int x2apic_phys; -extern int x2apic_preenabled; -extern void check_x2apic(void); -extern void enable_x2apic(void); +extern void __init check_x2apic(void); +extern void x2apic_setup(void); static inline int x2apic_enabled(void) { - u64 msr; - - if (!cpu_has_x2apic) - return 0; - - rdmsrl(MSR_IA32_APICBASE, msr); - if (msr & X2APIC_ENABLE) - return 1; - return 0; + return cpu_has_x2apic && apic_is_x2apic_enabled(); } #define x2apic_supported() (cpu_has_x2apic) -static inline void x2apic_force_phys(void) -{ - x2apic_phys = 1; -} #else -static inline void disable_x2apic(void) -{ -} -static inline void check_x2apic(void) -{ -} -static inline void enable_x2apic(void) -{ -} -static inline int x2apic_enabled(void) -{ - return 0; -} -static inline void x2apic_force_phys(void) -{ -} +static inline void check_x2apic(void) { } +static inline void x2apic_setup(void) { } +static inline int x2apic_enabled(void) { return 0; } -#define x2apic_preenabled 0 -#define x2apic_supported() 0 +#define x2apic_mode (0) +#define x2apic_supported() (0) #endif extern void enable_IR_x2apic(void); @@ -219,7 +201,6 @@ extern int get_physical_broadcast(void); extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); -extern void connect_bsp_APIC(void); extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); extern void lapic_shutdown(void); @@ -227,14 +208,23 @@ extern int verify_local_APIC(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); extern void setup_local_APIC(void); -extern void end_local_APIC_setup(void); -extern void bsp_end_local_APIC_setup(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); extern void setup_secondary_APIC_clock(void); extern int APIC_init_uniprocessor(void); + +#ifdef CONFIG_X86_64 +static inline int apic_force_enable(unsigned long addr) +{ + return -1; +} +#else extern int apic_force_enable(unsigned long addr); +#endif + +extern int apic_bsp_setup(bool upmode); +extern void apic_ap_setup(void); /* * On 32bit this is mach-xxx local diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 76659b67fd11..1f1297b46f83 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -83,7 +83,6 @@ For 32-bit we have the following conventions - kernel is built with #define SS 160 #define ARGOFFSET R11 -#define SWFRAME ORIG_RAX .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 subq $9*8+\addskip, %rsp diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index aede2c347bde..90a54851aedc 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -174,6 +174,7 @@ #define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ /* @@ -388,6 +389,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) +#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT) #if __GNUC__ >= 4 extern void warn_pre_alternatives(void); diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 61fd18b83b6c..12cb66f6d3a5 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -114,5 +114,10 @@ static inline void debug_stack_usage_inc(void) { } static inline void debug_stack_usage_dec(void) { } #endif /* X86_64 */ +#ifdef CONFIG_CPU_SUP_AMD +extern void set_dr_addr_mask(unsigned long mask, int dr); +#else +static inline void set_dr_addr_mask(unsigned long mask, int dr) { } +#endif #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e97622f57722..0dbc08282291 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -207,7 +207,7 @@ static inline void fpu_fxsave(struct fpu *fpu) if (config_enabled(CONFIG_X86_32)) asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); else if (config_enabled(CONFIG_AS_FXSAVEQ)) - asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); + asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave)); else { /* Using "rex64; fxsave %0" is broken because, if the memory * operand uses any extended registers for addressing, a second @@ -290,9 +290,11 @@ static inline int fpu_restore_checking(struct fpu *fpu) static inline int restore_fpu_checking(struct task_struct *tsk) { - /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. "m" is a random variable that should be in L1 */ + /* + * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is + * pending. Clear the x87 state here by setting it to fixed values. + * "m" is a random variable that should be in L1. + */ if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { asm volatile( "fnclex\n\t" diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index ef1c4d2d41ec..6c98be864a75 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -12,6 +12,7 @@ */ struct arch_hw_breakpoint { unsigned long address; + unsigned long mask; u8 len; u8 type; }; diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index ed8089d69094..6eb6fcb83f63 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -40,8 +40,8 @@ extern void __kernel_fpu_end(void); static inline void kernel_fpu_begin(void) { - WARN_ON_ONCE(!irq_fpu_usable()); preempt_disable(); + WARN_ON_ONCE(!irq_fpu_usable()); __kernel_fpu_begin(); } @@ -51,6 +51,10 @@ static inline void kernel_fpu_end(void) preempt_enable(); } +/* Must be called with preempt disabled */ +extern void kernel_fpu_disable(void); +extern void kernel_fpu_enable(void); + /* * Some instructions like VIA's padlock instructions generate a spurious * DNA fault but don't modify SSE registers. And these instructions diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index bf006cce9418..2f91685fe1cd 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -279,6 +279,11 @@ static inline void disable_ioapic_support(void) { } #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL #define native_eoi_ioapic_pin NULL + +static inline void setup_IO_APIC(void) { } +static inline void enable_IO_APIC(void) { } +static inline void setup_ioapic_dest(void) { } + #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index b7747c4c2cf2..6224d316c405 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -33,8 +33,6 @@ struct irq_cfg; #ifdef CONFIG_IRQ_REMAP -extern void setup_irq_remapping_ops(void); -extern int irq_remapping_supported(void); extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); extern int irq_remapping_enable(void); @@ -60,8 +58,6 @@ void irq_remap_modify_chip_defaults(struct irq_chip *chip); #else /* CONFIG_IRQ_REMAP */ -static inline void setup_irq_remapping_ops(void) { } -static inline int irq_remapping_supported(void) { return 0; } static inline void set_irq_remapping_broken(void) { } static inline int irq_remapping_prepare(void) { return -ENODEV; } static inline int irq_remapping_enable(void) { return -ENODEV; } diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 51b26e895933..9b3de99dc004 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -190,7 +190,6 @@ enum mcp_flags { void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); int mce_notify_irq(void); -void mce_notify_process(void); DECLARE_PER_CPU(struct mce, injectm); diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h index fc7a17c05d35..bc0fc0866553 100644 --- a/arch/x86/include/asm/pmc_atom.h +++ b/arch/x86/include/asm/pmc_atom.h @@ -53,6 +53,28 @@ /* Sleep state counter is in units of of 32us */ #define PMC_TMR_SHIFT 5 +/* Power status of power islands */ +#define PMC_PSS 0x98 + +#define PMC_PSS_BIT_GBE BIT(0) +#define PMC_PSS_BIT_SATA BIT(1) +#define PMC_PSS_BIT_HDA BIT(2) +#define PMC_PSS_BIT_SEC BIT(3) +#define PMC_PSS_BIT_PCIE BIT(4) +#define PMC_PSS_BIT_LPSS BIT(5) +#define PMC_PSS_BIT_LPE BIT(6) +#define PMC_PSS_BIT_DFX BIT(7) +#define PMC_PSS_BIT_USH_CTRL BIT(8) +#define PMC_PSS_BIT_USH_SUS BIT(9) +#define PMC_PSS_BIT_USH_VCCS BIT(10) +#define PMC_PSS_BIT_USH_VCCA BIT(11) +#define PMC_PSS_BIT_OTG_CTRL BIT(12) +#define PMC_PSS_BIT_OTG_VCCS BIT(13) +#define PMC_PSS_BIT_OTG_VCCA_CLK BIT(14) +#define PMC_PSS_BIT_OTG_VCCA BIT(15) +#define PMC_PSS_BIT_USB BIT(16) +#define PMC_PSS_BIT_USB_SUS BIT(17) + /* These registers reflect D3 status of functions */ #define PMC_D3_STS_0 0xA0 diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h deleted file mode 100644 index 0da7409f0bec..000000000000 --- a/arch/x86/include/asm/smpboot_hooks.h +++ /dev/null @@ -1,68 +0,0 @@ -/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws - * which needs to alter them. */ - -static inline void smpboot_clear_io_apic_irqs(void) -{ -#ifdef CONFIG_X86_IO_APIC - io_apic_irqs = 0; -#endif -} - -static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) -{ - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(0xa, 0xf); - spin_unlock_irqrestore(&rtc_lock, flags); - local_flush_tlb(); - pr_debug("1.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = - start_eip >> 4; - pr_debug("2.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = - start_eip & 0xf; - pr_debug("3.\n"); -} - -static inline void smpboot_restore_warm_reset_vector(void) -{ - unsigned long flags; - - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - - /* - * Paranoid: Set warm reset code and vector here back - * to default values. - */ - spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(0, 0xf); - spin_unlock_irqrestore(&rtc_lock, flags); - - *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; -} - -static inline void __init smpboot_setup_io_apic(void) -{ -#ifdef CONFIG_X86_IO_APIC - /* - * Here we can be sure that there is an IO-APIC in the system. Let's - * go and set it up: - */ - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else { - nr_ioapics = 0; - } -#endif -} - -static inline void smpboot_clear_io_apic(void) -{ -#ifdef CONFIG_X86_IO_APIC - nr_ioapics = 0; -#endif -} diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 547e344a6dc6..e82e95abc92b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -75,7 +75,6 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ -#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -100,7 +99,6 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -140,7 +138,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) /* flags to check in __switch_to() */ @@ -170,6 +168,17 @@ static inline struct thread_info *current_thread_info(void) return ti; } +static inline unsigned long current_stack_pointer(void) +{ + unsigned long sp; +#ifdef CONFIG_X86_64 + asm("mov %%rsp,%0" : "=g" (sp)); +#else + asm("mov %%esp,%0" : "=g" (sp)); +#endif + return sp; +} + #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 707adc6549d8..4e49d7dff78e 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_TRAPS_H #define _ASM_X86_TRAPS_H +#include <linux/context_tracking_state.h> #include <linux/kprobes.h> #include <asm/debugreg.h> @@ -110,6 +111,11 @@ asmlinkage void smp_thermal_interrupt(void); asmlinkage void mce_threshold_interrupt(void); #endif +extern enum ctx_state ist_enter(struct pt_regs *regs); +extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state); +extern void ist_begin_non_atomic(struct pt_regs *regs); +extern void ist_end_non_atomic(void); + /* Interrupts/Exceptions */ enum { X86_TRAP_DE = 0, /* 0, Divide-by-zero */ diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c8aa65d56027..d979e5abae55 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -251,6 +251,10 @@ /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +#define MSR_F16H_DR1_ADDR_MASK 0xc0011019 +#define MSR_F16H_DR2_ADDR_MASK 0xc001101a +#define MSR_F16H_DR3_ADDR_MASK 0xc001101b +#define MSR_F16H_DR0_ADDR_MASK 0xc0011027 /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b9e30daa0881..a18fff361c7f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -653,6 +653,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi, return gsi; } +#ifdef CONFIG_X86_LOCAL_APIC static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, int trigger, int polarity) { @@ -675,6 +676,7 @@ static void acpi_unregister_gsi_ioapic(u32 gsi) mutex_unlock(&acpi_ioapic_lock); #endif } +#endif int (*__acpi_register_gsi)(struct device *dev, u32 gsi, int trigger, int polarity) = acpi_register_gsi_pic; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index b708738d016e..6a7c23ff21d3 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -135,14 +135,6 @@ static inline void apbt_clear_mapping(void) apbt_virt_address = NULL; } -/* - * APBT timer interrupt enable / disable - */ -static inline int is_apbt_capable(void) -{ - return apbt_virt_address ? 1 : 0; -} - static int __init apbt_clockevent_register(void) { struct sfi_timer_table_entry *mtmr; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 29b5b18afa27..b665d241efad 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -134,9 +134,6 @@ static inline void imcr_apic_to_pic(void) */ static int force_enable_local_apic __initdata; -/* Control whether x2APIC mode is enabled or not */ -static bool nox2apic __initdata; - /* * APIC command line parameters */ @@ -161,33 +158,6 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif -int x2apic_mode; -#ifdef CONFIG_X86_X2APIC -/* x2apic enabled before OS handover */ -int x2apic_preenabled; -static int x2apic_disabled; -static int __init setup_nox2apic(char *str) -{ - if (x2apic_enabled()) { - int apicid = native_apic_msr_read(APIC_ID); - - if (apicid >= 255) { - pr_warning("Apicid: %08x, cannot enforce nox2apic\n", - apicid); - return 0; - } - - pr_warning("x2apic already enabled. will disable it\n"); - } else - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - - nox2apic = true; - - return 0; -} -early_param("nox2apic", setup_nox2apic); -#endif - unsigned long mp_lapic_addr; int disable_apic; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ @@ -1475,7 +1445,7 @@ void setup_local_APIC(void) #endif } -void end_local_APIC_setup(void) +static void end_local_APIC_setup(void) { lapic_setup_esr(); @@ -1492,116 +1462,184 @@ void end_local_APIC_setup(void) apic_pm_activate(); } -void __init bsp_end_local_APIC_setup(void) +/* + * APIC setup function for application processors. Called from smpboot.c + */ +void apic_ap_setup(void) { + setup_local_APIC(); end_local_APIC_setup(); - - /* - * Now that local APIC setup is completed for BP, configure the fault - * handling for interrupt remapping. - */ - irq_remap_enable_fault_handling(); - } #ifdef CONFIG_X86_X2APIC -/* - * Need to disable xapic and x2apic at the same time and then enable xapic mode - */ -static inline void __disable_x2apic(u64 msr) -{ - wrmsrl(MSR_IA32_APICBASE, - msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); - wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); -} +int x2apic_mode; -static __init void disable_x2apic(void) +enum { + X2APIC_OFF, + X2APIC_ON, + X2APIC_DISABLED, +}; +static int x2apic_state; + +static inline void __x2apic_disable(void) { u64 msr; - if (!cpu_has_x2apic) + if (cpu_has_apic) return; rdmsrl(MSR_IA32_APICBASE, msr); - if (msr & X2APIC_ENABLE) { - u32 x2apic_id = read_apic_id(); - - if (x2apic_id >= 255) - panic("Cannot disable x2apic, id: %08x\n", x2apic_id); + if (!(msr & X2APIC_ENABLE)) + return; + /* Disable xapic and x2apic first and then reenable xapic mode */ + wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); + wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); + printk_once(KERN_INFO "x2apic disabled\n"); +} - pr_info("Disabling x2apic\n"); - __disable_x2apic(msr); +static inline void __x2apic_enable(void) +{ + u64 msr; - if (nox2apic) { - clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC); - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - } + rdmsrl(MSR_IA32_APICBASE, msr); + if (msr & X2APIC_ENABLE) + return; + wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); + printk_once(KERN_INFO "x2apic enabled\n"); +} - x2apic_disabled = 1; - x2apic_mode = 0; +static int __init setup_nox2apic(char *str) +{ + if (x2apic_enabled()) { + int apicid = native_apic_msr_read(APIC_ID); - register_lapic_address(mp_lapic_addr); + if (apicid >= 255) { + pr_warning("Apicid: %08x, cannot enforce nox2apic\n", + apicid); + return 0; + } + pr_warning("x2apic already enabled.\n"); + __x2apic_disable(); } + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + x2apic_state = X2APIC_DISABLED; + x2apic_mode = 0; + return 0; } +early_param("nox2apic", setup_nox2apic); -void check_x2apic(void) +/* Called from cpu_init() to enable x2apic on (secondary) cpus */ +void x2apic_setup(void) { - if (x2apic_enabled()) { - pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); - x2apic_preenabled = x2apic_mode = 1; + /* + * If x2apic is not in ON state, disable it if already enabled + * from BIOS. + */ + if (x2apic_state != X2APIC_ON) { + __x2apic_disable(); + return; } + __x2apic_enable(); } -void enable_x2apic(void) +static __init void x2apic_disable(void) { - u64 msr; + u32 x2apic_id; - rdmsrl(MSR_IA32_APICBASE, msr); - if (x2apic_disabled) { - __disable_x2apic(msr); + if (x2apic_state != X2APIC_ON) + goto out; + + x2apic_id = read_apic_id(); + if (x2apic_id >= 255) + panic("Cannot disable x2apic, id: %08x\n", x2apic_id); + + __x2apic_disable(); + register_lapic_address(mp_lapic_addr); +out: + x2apic_state = X2APIC_DISABLED; + x2apic_mode = 0; +} + +static __init void x2apic_enable(void) +{ + if (x2apic_state != X2APIC_OFF) return; - } - if (!x2apic_mode) + x2apic_mode = 1; + x2apic_state = X2APIC_ON; + __x2apic_enable(); +} + +static __init void try_to_enable_x2apic(int remap_mode) +{ + if (x2apic_state == X2APIC_DISABLED) return; - if (!(msr & X2APIC_ENABLE)) { - printk_once(KERN_INFO "Enabling x2apic\n"); - wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); + if (remap_mode != IRQ_REMAP_X2APIC_MODE) { + /* IR is required if there is APIC ID > 255 even when running + * under KVM + */ + if (max_physical_apicid > 255 || + (IS_ENABLED(CONFIG_HYPERVISOR_GUEST) && + !hypervisor_x2apic_available())) { + pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); + x2apic_disable(); + return; + } + + /* + * without IR all CPUs can be addressed by IOAPIC/MSI + * only in physical mode + */ + x2apic_phys = 1; } + x2apic_enable(); } -#endif /* CONFIG_X86_X2APIC */ -int __init enable_IR(void) +void __init check_x2apic(void) { -#ifdef CONFIG_IRQ_REMAP - if (!irq_remapping_supported()) { - pr_debug("intr-remapping not supported\n"); - return -1; + if (x2apic_enabled()) { + pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); + x2apic_mode = 1; + x2apic_state = X2APIC_ON; + } else if (!cpu_has_x2apic) { + x2apic_state = X2APIC_DISABLED; } +} +#else /* CONFIG_X86_X2APIC */ +static int __init validate_x2apic(void) +{ + if (!apic_is_x2apic_enabled()) + return 0; + /* + * Checkme: Can we simply turn off x2apic here instead of panic? + */ + panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n"); +} +early_initcall(validate_x2apic); - if (!x2apic_preenabled && skip_ioapic_setup) { - pr_info("Skipped enabling intr-remap because of skipping " - "io-apic setup\n"); +static inline void try_to_enable_x2apic(int remap_mode) { } +static inline void __x2apic_enable(void) { } +#endif /* !CONFIG_X86_X2APIC */ + +static int __init try_to_enable_IR(void) +{ +#ifdef CONFIG_X86_IO_APIC + if (!x2apic_enabled() && skip_ioapic_setup) { + pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); return -1; } - - return irq_remapping_enable(); #endif - return -1; + return irq_remapping_enable(); } void __init enable_IR_x2apic(void) { unsigned long flags; - int ret, x2apic_enabled = 0; - int hardware_init_ret; - - /* Make sure irq_remap_ops are initialized */ - setup_irq_remapping_ops(); + int ret, ir_stat; - hardware_init_ret = irq_remapping_prepare(); - if (hardware_init_ret && !x2apic_supported()) + ir_stat = irq_remapping_prepare(); + if (ir_stat < 0 && !x2apic_supported()) return; ret = save_ioapic_entries(); @@ -1614,49 +1652,13 @@ void __init enable_IR_x2apic(void) legacy_pic->mask_all(); mask_ioapic_entries(); - if (x2apic_preenabled && nox2apic) - disable_x2apic(); - - if (hardware_init_ret) - ret = -1; - else - ret = enable_IR(); - - if (!x2apic_supported()) - goto skip_x2apic; + /* If irq_remapping_prepare() succeded, try to enable it */ + if (ir_stat >= 0) + ir_stat = try_to_enable_IR(); + /* ir_stat contains the remap mode or an error code */ + try_to_enable_x2apic(ir_stat); - if (ret < 0) { - /* IR is required if there is APIC ID > 255 even when running - * under KVM - */ - if (max_physical_apicid > 255 || - !hypervisor_x2apic_available()) { - if (x2apic_preenabled) - disable_x2apic(); - goto skip_x2apic; - } - /* - * without IR all CPUs can be addressed by IOAPIC/MSI - * only in physical mode - */ - x2apic_force_phys(); - } - - if (ret == IRQ_REMAP_XAPIC_MODE) { - pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); - goto skip_x2apic; - } - - x2apic_enabled = 1; - - if (x2apic_supported() && !x2apic_mode) { - x2apic_mode = 1; - enable_x2apic(); - pr_info("Enabled x2apic\n"); - } - -skip_x2apic: - if (ret < 0) /* IR enabling failed */ + if (ir_stat < 0) restore_ioapic_entries(); legacy_pic->restore_mask(); local_irq_restore(flags); @@ -1847,82 +1849,8 @@ void __init register_lapic_address(unsigned long address) } } -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ int apic_version[MAX_LOCAL_APIC]; -int __init APIC_init_uniprocessor(void) -{ - if (disable_apic) { - pr_info("Apic disabled\n"); - return -1; - } -#ifdef CONFIG_X86_64 - if (!cpu_has_apic) { - disable_apic = 1; - pr_info("Apic disabled by BIOS\n"); - return -1; - } -#else - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - pr_err("BIOS bug, local APIC 0x%x not detected!...\n", - boot_cpu_physical_apicid); - return -1; - } -#endif - - default_setup_apic_routing(); - - verify_local_APIC(); - connect_bsp_APIC(); - -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); -#else - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -# ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = read_apic_id(); -# endif -#endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); - -#ifdef CONFIG_X86_IO_APIC - /* - * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling error vector - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); -#endif - - bsp_end_local_APIC_setup(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else { - nr_ioapics = 0; - } -#endif - - x86_init.timers.setup_percpu_clockev(); - return 0; -} - /* * Local APIC interrupts */ @@ -2027,7 +1955,7 @@ __visible void smp_trace_error_interrupt(struct pt_regs *regs) /** * connect_bsp_APIC - attach the APIC to the interrupt system */ -void __init connect_bsp_APIC(void) +static void __init connect_bsp_APIC(void) { #ifdef CONFIG_X86_32 if (pic_mode) { @@ -2274,6 +2202,100 @@ void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) } } +static void __init apic_bsp_up_setup(void) +{ +#ifdef CONFIG_X86_64 + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); +#else + /* + * Hack: In case of kdump, after a crash, kernel might be booting + * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid + * might be zero if read from MP tables. Get it from LAPIC. + */ +# ifdef CONFIG_CRASH_DUMP + boot_cpu_physical_apicid = read_apic_id(); +# endif +#endif + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); +} + +/** + * apic_bsp_setup - Setup function for local apic and io-apic + * @upmode: Force UP mode (for APIC_init_uniprocessor) + * + * Returns: + * apic_id of BSP APIC + */ +int __init apic_bsp_setup(bool upmode) +{ + int id; + + connect_bsp_APIC(); + if (upmode) + apic_bsp_up_setup(); + setup_local_APIC(); + + if (x2apic_mode) + id = apic_read(APIC_LDR); + else + id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + + enable_IO_APIC(); + end_local_APIC_setup(); + irq_remap_enable_fault_handling(); + setup_IO_APIC(); + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); + return id; +} + +/* + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. + */ +int __init APIC_init_uniprocessor(void) +{ + if (disable_apic) { + pr_info("Apic disabled\n"); + return -1; + } +#ifdef CONFIG_X86_64 + if (!cpu_has_apic) { + disable_apic = 1; + pr_info("Apic disabled by BIOS\n"); + return -1; + } +#else + if (!smp_found_config && !cpu_has_apic) + return -1; + + /* + * Complain if the BIOS pretends there is one. + */ + if (!cpu_has_apic && + APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + pr_err("BIOS bug, local APIC 0x%x not detected!...\n", + boot_cpu_physical_apicid); + return -1; + } +#endif + + if (!smp_found_config) + disable_ioapic_support(); + + default_setup_apic_routing(); + verify_local_APIC(); + apic_bsp_setup(true); + return 0; +} + +#ifdef CONFIG_UP_LATE_INIT +void __init up_late_init(void) +{ + APIC_init_uniprocessor(); +} +#endif + /* * Power management */ @@ -2359,9 +2381,9 @@ static void lapic_resume(void) mask_ioapic_entries(); legacy_pic->mask_all(); - if (x2apic_mode) - enable_x2apic(); - else { + if (x2apic_mode) { + __x2apic_enable(); + } else { /* * Make sure the APICBASE points to the right address * diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3f5f60406ab1..f4dc2462a1ac 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1507,7 +1507,10 @@ void __init enable_IO_APIC(void) int i8259_apic, i8259_pin; int apic, pin; - if (!nr_legacy_irqs()) + if (skip_ioapic_setup) + nr_ioapics = 0; + + if (!nr_legacy_irqs() || !nr_ioapics) return; for_each_ioapic_pin(apic, pin) { @@ -2295,7 +2298,7 @@ static inline void __init check_timer(void) } local_irq_disable(); apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); - if (x2apic_preenabled) + if (apic_is_x2apic_enabled()) apic_printk(APIC_QUIET, KERN_INFO "Perhaps problem with the pre-enabled x2apic mode\n" "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); @@ -2373,9 +2376,9 @@ void __init setup_IO_APIC(void) { int ioapic; - /* - * calling enable_IO_APIC() is moved to setup_local_APIC for BP - */ + if (skip_ioapic_setup || !nr_ioapics) + return; + io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15c5df92f74e..a220239cea65 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -869,3 +869,22 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) return false; } + +void set_dr_addr_mask(unsigned long mask, int dr) +{ + if (!cpu_has_bpext) + return; + + switch (dr) { + case 0: + wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0); + break; + case 1: + case 2: + case 3: + wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0); + break; + default: + break; + } +} diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c6049650c093..b15bffcaba6d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -491,17 +491,18 @@ u16 __read_mostly tlb_lld_2m[NR_INFO]; u16 __read_mostly tlb_lld_4m[NR_INFO]; u16 __read_mostly tlb_lld_1g[NR_INFO]; -void cpu_detect_tlb(struct cpuinfo_x86 *c) +static void cpu_detect_tlb(struct cpuinfo_x86 *c) { if (this_cpu->c_detect_tlb) this_cpu->c_detect_tlb(c); - printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" - "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", + pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], - tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], - tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], - tlb_lld_1g[ENTRIES]); + tlb_lli_4m[ENTRIES]); + + pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", + tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], + tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); } void detect_ht(struct cpuinfo_x86 *c) @@ -1332,7 +1333,7 @@ void cpu_init(void) barrier(); x86_configure_nx(); - enable_x2apic(); + x2apic_setup(); /* * set up and load the per-CPU TSS diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9cc6b6f25f42..94d7dcb12145 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -487,10 +487,8 @@ static void init_intel(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { - printk_once(KERN_WARNING "ENERGY_PERF_BIAS:" - " Set to 'normal', was 'performance'\n" - "ENERGY_PERF_BIAS: View and update with" - " x86_energy_perf_policy(8)\n"); + pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); + pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d2c611699cd9..cdfed7953963 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -43,6 +43,7 @@ #include <linux/export.h> #include <asm/processor.h> +#include <asm/traps.h> #include <asm/mce.h> #include <asm/msr.h> @@ -115,7 +116,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. */ -ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) @@ -311,7 +312,7 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } -static void mce_panic(char *msg, struct mce *final, char *exp) +static void mce_panic(const char *msg, struct mce *final, char *exp) { int i, apei_err = 0; @@ -529,7 +530,7 @@ static void mce_schedule_work(void) schedule_work(this_cpu_ptr(&mce_work)); } -DEFINE_PER_CPU(struct irq_work, mce_irq_work); +static DEFINE_PER_CPU(struct irq_work, mce_irq_work); static void mce_irq_work_cb(struct irq_work *entry) { @@ -735,7 +736,7 @@ static atomic_t mce_callin; /* * Check if a timeout waiting for other CPUs happened. */ -static int mce_timed_out(u64 *t) +static int mce_timed_out(u64 *t, const char *msg) { /* * The others already did panic for some reason. @@ -750,8 +751,7 @@ static int mce_timed_out(u64 *t) goto out; if ((s64)*t < SPINUNIT) { if (mca_cfg.tolerant <= 1) - mce_panic("Timeout synchronizing machine check over CPUs", - NULL, NULL); + mce_panic(msg, NULL, NULL); cpu_missing = 1; return 1; } @@ -867,7 +867,8 @@ static int mce_start(int *no_way_out) * Wait for everyone. */ while (atomic_read(&mce_callin) != cpus) { - if (mce_timed_out(&timeout)) { + if (mce_timed_out(&timeout, + "Timeout: Not all CPUs entered broadcast exception handler")) { atomic_set(&global_nwo, 0); return -1; } @@ -892,7 +893,8 @@ static int mce_start(int *no_way_out) * only seen by one CPU before cleared, avoiding duplicates. */ while (atomic_read(&mce_executing) < order) { - if (mce_timed_out(&timeout)) { + if (mce_timed_out(&timeout, + "Timeout: Subject CPUs unable to finish machine check processing")) { atomic_set(&global_nwo, 0); return -1; } @@ -936,7 +938,8 @@ static int mce_end(int order) * loops. */ while (atomic_read(&mce_executing) <= cpus) { - if (mce_timed_out(&timeout)) + if (mce_timed_out(&timeout, + "Timeout: Monarch CPU unable to finish machine check processing")) goto reset; ndelay(SPINUNIT); } @@ -949,7 +952,8 @@ static int mce_end(int order) * Subject: Wait for Monarch to finish. */ while (atomic_read(&mce_executing) != 0) { - if (mce_timed_out(&timeout)) + if (mce_timed_out(&timeout, + "Timeout: Monarch CPU did not finish machine check processing")) goto reset; ndelay(SPINUNIT); } @@ -1003,51 +1007,6 @@ static void mce_clear_state(unsigned long *toclear) } /* - * Need to save faulting physical address associated with a process - * in the machine check handler some place where we can grab it back - * later in mce_notify_process() - */ -#define MCE_INFO_MAX 16 - -struct mce_info { - atomic_t inuse; - struct task_struct *t; - __u64 paddr; - int restartable; -} mce_info[MCE_INFO_MAX]; - -static void mce_save_info(__u64 addr, int c) -{ - struct mce_info *mi; - - for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { - if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { - mi->t = current; - mi->paddr = addr; - mi->restartable = c; - return; - } - } - - mce_panic("Too many concurrent recoverable errors", NULL, NULL); -} - -static struct mce_info *mce_find_info(void) -{ - struct mce_info *mi; - - for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) - if (atomic_read(&mi->inuse) && mi->t == current) - return mi; - return NULL; -} - -static void mce_clear_info(struct mce_info *mi) -{ - atomic_set(&mi->inuse, 0); -} - -/* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. * @@ -1063,6 +1022,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) { struct mca_config *cfg = &mca_cfg; struct mce m, *final; + enum ctx_state prev_state; int i; int worst = 0; int severity; @@ -1084,6 +1044,10 @@ void do_machine_check(struct pt_regs *regs, long error_code) DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); char *msg = "Unknown"; + u64 recover_paddr = ~0ull; + int flags = MF_ACTION_REQUIRED; + + prev_state = ist_enter(regs); this_cpu_inc(mce_exception_count); @@ -1203,9 +1167,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (no_way_out) mce_panic("Fatal machine check on current CPU", &m, msg); if (worst == MCE_AR_SEVERITY) { - /* schedule action before return to userland */ - mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); - set_thread_flag(TIF_MCE_NOTIFY); + recover_paddr = m.addr; + if (!(m.mcgstatus & MCG_STATUS_RIPV)) + flags |= MF_MUST_KILL; } else if (kill_it) { force_sig(SIGBUS, current); } @@ -1216,6 +1180,27 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); out: sync_core(); + + if (recover_paddr == ~0ull) + goto done; + + pr_err("Uncorrected hardware memory error in user-access at %llx", + recover_paddr); + /* + * We must call memory_failure() here even if the current process is + * doomed. We still need to mark the page as poisoned and alert any + * other users of the page. + */ + ist_begin_non_atomic(regs); + local_irq_enable(); + if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) { + pr_err("Memory error not recovered"); + force_sig(SIGBUS, current); + } + local_irq_disable(); + ist_end_non_atomic(); +done: + ist_exit(regs, prev_state); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1233,42 +1218,6 @@ int memory_failure(unsigned long pfn, int vector, int flags) #endif /* - * Called in process context that interrupted by MCE and marked with - * TIF_MCE_NOTIFY, just before returning to erroneous userland. - * This code is allowed to sleep. - * Attempt possible recovery such as calling the high level VM handler to - * process any corrupted pages, and kill/signal current process if required. - * Action required errors are handled here. - */ -void mce_notify_process(void) -{ - unsigned long pfn; - struct mce_info *mi = mce_find_info(); - int flags = MF_ACTION_REQUIRED; - - if (!mi) - mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); - pfn = mi->paddr >> PAGE_SHIFT; - - clear_thread_flag(TIF_MCE_NOTIFY); - - pr_err("Uncorrected hardware memory error in user-access at %llx", - mi->paddr); - /* - * We must call memory_failure() here even if the current process is - * doomed. We still need to mark the page as poisoned and alert any - * other users of the page. - */ - if (!mi->restartable) - flags |= MF_MUST_KILL; - if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { - pr_err("Memory error not recovered"); - force_sig(SIGBUS, current); - } - mce_clear_info(mi); -} - -/* * Action optional processing happens here (picking up * from the list of faulting pages that do_machine_check() * placed into the "ring"). diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index a3042989398c..ec2663a708e4 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -8,6 +8,7 @@ #include <linux/smp.h> #include <asm/processor.h> +#include <asm/traps.h> #include <asm/mce.h> #include <asm/msr.h> @@ -17,8 +18,11 @@ int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) { + enum ctx_state prev_state; u32 loaddr, hi, lotype; + prev_state = ist_enter(regs); + rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); @@ -33,6 +37,8 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) } add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + + ist_exit(regs, prev_state); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 7dc5564d0cdf..bd5d46a32210 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -7,14 +7,19 @@ #include <linux/types.h> #include <asm/processor.h> +#include <asm/traps.h> #include <asm/mce.h> #include <asm/msr.h> /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { + enum ctx_state prev_state = ist_enter(regs); + printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + + ist_exit(regs, prev_state); } /* Set up machine check reporting on the Winchip C6 series */ diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index dd2f07ae9d0c..46201deee923 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -184,9 +184,9 @@ void __init e820_print_map(char *who) * overwritten in the same location, starting at biosmap. * * The integer pointed to by pnr_map must be valid on entry (the - * current number of valid entries located at biosmap) and will - * be updated on return, with the new number of valid entries - * (something no more than max_nr_map.) + * current number of valid entries located at biosmap). If the + * sanitizing succeeds the *pnr_map will be updated with the new + * number of valid entries (something no more than max_nr_map). * * The return value from sanitize_e820_map() is zero if it * successfully 'sanitized' the map entries passed in, and is -1 @@ -561,23 +561,15 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, void __init update_e820(void) { - u32 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map)) return; - e820.nr_map = nr_map; printk(KERN_INFO "e820: modified physical RAM map:\n"); e820_print_map("modified"); } static void __init update_e820_saved(void) { - u32 nr_map; - - nr_map = e820_saved.nr_map; - if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) - return; - e820_saved.nr_map = nr_map; + sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), + &e820_saved.nr_map); } #define MAX_GAP_END 0x100000000ull /* @@ -898,11 +890,9 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { if (userdef) { - u32 nr = e820.nr_map; - - if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), + &e820.nr_map) < 0) early_panic("Invalid user supplied memory map"); - e820.nr_map = nr; printk(KERN_INFO "e820: user-defined physical RAM map:\n"); e820_print_map("user"); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9ebaf63ba182..db13655c3a2a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -143,7 +143,8 @@ ENDPROC(native_usergs_sysret64) movq \tmp,RSP+\offset(%rsp) movq $__USER_DS,SS+\offset(%rsp) movq $__USER_CS,CS+\offset(%rsp) - movq $-1,RCX+\offset(%rsp) + movq RIP+\offset(%rsp),\tmp /* get rip */ + movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */ movq R11+\offset(%rsp),\tmp /* get eflags */ movq \tmp,EFLAGS+\offset(%rsp) .endm @@ -155,27 +156,6 @@ ENDPROC(native_usergs_sysret64) movq \tmp,R11+\offset(%rsp) .endm - .macro FAKE_STACK_FRAME child_rip - /* push in order ss, rsp, eflags, cs, rip */ - xorl %eax, %eax - pushq_cfi $__KERNEL_DS /* ss */ - /*CFI_REL_OFFSET ss,0*/ - pushq_cfi %rax /* rsp */ - CFI_REL_OFFSET rsp,0 - pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */ - /*CFI_REL_OFFSET rflags,0*/ - pushq_cfi $__KERNEL_CS /* cs */ - /*CFI_REL_OFFSET cs,0*/ - pushq_cfi \child_rip /* rip */ - CFI_REL_OFFSET rip,0 - pushq_cfi %rax /* orig rax */ - .endm - - .macro UNFAKE_STACK_FRAME - addq $8*6, %rsp - CFI_ADJUST_CFA_OFFSET -(6*8) - .endm - /* * initial frame state for interrupts (and exceptions without error code) */ @@ -238,51 +218,6 @@ ENDPROC(native_usergs_sysret64) CFI_REL_OFFSET r15, R15+\offset .endm -/* save partial stack frame */ - .macro SAVE_ARGS_IRQ - cld - /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, (RDI-RBP) - movq_cfi rsi, (RSI-RBP) - movq_cfi rdx, (RDX-RBP) - movq_cfi rcx, (RCX-RBP) - movq_cfi rax, (RAX-RBP) - movq_cfi r8, (R8-RBP) - movq_cfi r9, (R9-RBP) - movq_cfi r10, (R10-RBP) - movq_cfi r11, (R11-RBP) - - /* Save rbp so that we can unwind from get_irq_regs() */ - movq_cfi rbp, 0 - - /* Save previous stack value */ - movq %rsp, %rsi - - leaq -RBP(%rsp),%rdi /* arg1 for handler */ - testl $3, CS-RBP(%rsi) - je 1f - SWAPGS - /* - * irq_count is used to check if a CPU is already on an interrupt stack - * or not. While this is essentially redundant with preempt_count it is - * a little cheaper to use a separate counter in the PDA (short of - * moving irq_enter into assembly, which would be too much work) - */ -1: incl PER_CPU_VAR(irq_count) - cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp - CFI_DEF_CFA_REGISTER rsi - - /* Store previous stack value */ - pushq %rsi - CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ - 0x77 /* DW_OP_breg7 */, 0, \ - 0x06 /* DW_OP_deref */, \ - 0x08 /* DW_OP_const1u */, SS+8-RBP, \ - 0x22 /* DW_OP_plus */ - /* We entered an interrupt context - irqs are off: */ - TRACE_IRQS_OFF - .endm - ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld @@ -426,15 +361,12 @@ system_call_fastpath: * Has incomplete stack frame and undefined top of stack. */ ret_from_sys_call: - movl $_TIF_ALLWORK_MASK,%edi - /* edi: flagmask */ -sysret_check: + testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jnz int_ret_from_sys_call_fixup /* Go the the slow path */ + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx - andl %edi,%edx - jnz sysret_careful CFI_REMEMBER_STATE /* * sysretq will re-enable interrupts: @@ -448,49 +380,10 @@ sysret_check: USERGS_SYSRET64 CFI_RESTORE_STATE - /* Handle reschedules */ - /* edx: work, edi: workmask */ -sysret_careful: - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %rdi - SCHEDULE_USER - popq_cfi %rdi - jmp sysret_check - /* Handle a signal */ -sysret_signal: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) -#ifdef CONFIG_AUDITSYSCALL - bt $TIF_SYSCALL_AUDIT,%edx - jc sysret_audit -#endif - /* - * We have a signal, or exit tracing or single-step. - * These all wind up with the iret return path anyway, - * so just join that path right now. - */ +int_ret_from_sys_call_fixup: FIXUP_TOP_OF_STACK %r11, -ARGOFFSET - jmp int_check_syscall_exit_work - -#ifdef CONFIG_AUDITSYSCALL - /* - * Return fast path for syscall audit. Call __audit_syscall_exit() - * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT - * masked off. - */ -sysret_audit: - movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ - cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ - setbe %al /* 1 if so, 0 if not */ - movzbl %al,%edi /* zero-extend that into %edi */ - call __audit_syscall_exit - movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi - jmp sysret_check -#endif /* CONFIG_AUDITSYSCALL */ + jmp int_ret_from_sys_call /* Do syscall tracing */ tracesys: @@ -626,19 +519,6 @@ END(\label) FORK_LIKE vfork FIXED_FRAME stub_iopl, sys_iopl -ENTRY(ptregscall_common) - DEFAULT_FRAME 1 8 /* offset 8: return address */ - RESTORE_TOP_OF_STACK %r11, 8 - movq_cfi_restore R15+8, r15 - movq_cfi_restore R14+8, r14 - movq_cfi_restore R13+8, r13 - movq_cfi_restore R12+8, r12 - movq_cfi_restore RBP+8, rbp - movq_cfi_restore RBX+8, rbx - ret $REST_SKIP /* pop extended registers */ - CFI_ENDPROC -END(ptregscall_common) - ENTRY(stub_execve) CFI_STARTPROC addq $8, %rsp @@ -779,7 +659,48 @@ END(interrupt) /* reserve pt_regs for scratch regs and rbp */ subq $ORIG_RAX-RBP, %rsp CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP - SAVE_ARGS_IRQ + cld + /* start from rbp in pt_regs and jump over */ + movq_cfi rdi, (RDI-RBP) + movq_cfi rsi, (RSI-RBP) + movq_cfi rdx, (RDX-RBP) + movq_cfi rcx, (RCX-RBP) + movq_cfi rax, (RAX-RBP) + movq_cfi r8, (R8-RBP) + movq_cfi r9, (R9-RBP) + movq_cfi r10, (R10-RBP) + movq_cfi r11, (R11-RBP) + + /* Save rbp so that we can unwind from get_irq_regs() */ + movq_cfi rbp, 0 + + /* Save previous stack value */ + movq %rsp, %rsi + + leaq -RBP(%rsp),%rdi /* arg1 for handler */ + testl $3, CS-RBP(%rsi) + je 1f + SWAPGS + /* + * irq_count is used to check if a CPU is already on an interrupt stack + * or not. While this is essentially redundant with preempt_count it is + * a little cheaper to use a separate counter in the PDA (short of + * moving irq_enter into assembly, which would be too much work) + */ +1: incl PER_CPU_VAR(irq_count) + cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp + CFI_DEF_CFA_REGISTER rsi + + /* Store previous stack value */ + pushq %rsi + CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ + 0x77 /* DW_OP_breg7 */, 0, \ + 0x06 /* DW_OP_deref */, \ + 0x08 /* DW_OP_const1u */, SS+8-RBP, \ + 0x22 /* DW_OP_plus */ + /* We entered an interrupt context - irqs are off: */ + TRACE_IRQS_OFF + call \func .endm @@ -831,6 +752,60 @@ retint_swapgs: /* return to user-space */ */ DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_IRETQ + + /* + * Try to use SYSRET instead of IRET if we're returning to + * a completely clean 64-bit userspace context. + */ + movq (RCX-R11)(%rsp), %rcx + cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */ + jne opportunistic_sysret_failed + + /* + * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP + * in kernel space. This essentially lets the user take over + * the kernel, since userspace controls RSP. It's not worth + * testing for canonicalness exactly -- this check detects any + * of the 17 high bits set, which is true for non-canonical + * or kernel addresses. (This will pessimize vsyscall=native. + * Big deal.) + * + * If virtual addresses ever become wider, this will need + * to be updated to remain correct on both old and new CPUs. + */ + .ifne __VIRTUAL_MASK_SHIFT - 47 + .error "virtual address width changed -- sysret checks need update" + .endif + shr $__VIRTUAL_MASK_SHIFT, %rcx + jnz opportunistic_sysret_failed + + cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */ + jne opportunistic_sysret_failed + + movq (R11-ARGOFFSET)(%rsp), %r11 + cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */ + jne opportunistic_sysret_failed + + testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */ + jnz opportunistic_sysret_failed + + /* nothing to check for RSP */ + + cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */ + jne opportunistic_sysret_failed + + /* + * We win! This label is here just for ease of understanding + * perf profiles. Nothing jumps here. + */ +irq_return_via_sysret: + CFI_REMEMBER_STATE + RESTORE_ARGS 1,8,1 + movq (RSP-RIP)(%rsp),%rsp + USERGS_SYSRET64 + CFI_RESTORE_STATE + +opportunistic_sysret_failed: SWAPGS jmp restore_args @@ -1048,6 +1023,11 @@ ENTRY(\sym) CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 .if \paranoid + .if \paranoid == 1 + CFI_REMEMBER_STATE + testl $3, CS(%rsp) /* If coming from userspace, switch */ + jnz 1f /* stacks. */ + .endif call save_paranoid .else call error_entry @@ -1088,6 +1068,36 @@ ENTRY(\sym) jmp error_exit /* %ebx: no swapgs flag */ .endif + .if \paranoid == 1 + CFI_RESTORE_STATE + /* + * Paranoid entry from userspace. Switch stacks and treat it + * as a normal entry. This means that paranoid handlers + * run in real process context if user_mode(regs). + */ +1: + call error_entry + + DEFAULT_FRAME 0 + + movq %rsp,%rdi /* pt_regs pointer */ + call sync_regs + movq %rax,%rsp /* switch stack */ + + movq %rsp,%rdi /* pt_regs pointer */ + + .if \has_error_code + movq ORIG_RAX(%rsp),%rsi /* get error code */ + movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ + .else + xorl %esi,%esi /* no error code */ + .endif + + call \do_sym + + jmp error_exit /* %ebx: no swapgs flag */ + .endif + CFI_ENDPROC END(\sym) .endm @@ -1108,7 +1118,7 @@ idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault do_double_fault has_error_code=1 paranoid=1 +idtentry double_fault do_double_fault has_error_code=1 paranoid=2 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 @@ -1289,16 +1299,14 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector( #endif /* - * "Paranoid" exit path from exception stack. - * Paranoid because this is used by NMIs and cannot take - * any kernel state for granted. - * We don't do kernel preemption checks here, because only - * NMI should be common and it does not enable IRQs and - * cannot get reschedule ticks. + * "Paranoid" exit path from exception stack. This is invoked + * only on return from non-NMI IST interrupts that came + * from kernel space. * - * "trace" is 0 for the NMI handler only, because irq-tracing - * is fundamentally NMI-unsafe. (we cannot change the soft and - * hard flags at once, atomically) + * We may be returning to very strange contexts (e.g. very early + * in syscall entry), so checking for preemption here would + * be complicated. Fortunately, we there's no good reason + * to try to handle preemption here. */ /* ebx: no swapgs flag */ @@ -1308,43 +1316,14 @@ ENTRY(paranoid_exit) TRACE_IRQS_OFF_DEBUG testl %ebx,%ebx /* swapgs needed? */ jnz paranoid_restore - testl $3,CS(%rsp) - jnz paranoid_userspace -paranoid_swapgs: TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK RESTORE_ALL 8 - jmp irq_return + INTERRUPT_RETURN paranoid_restore: TRACE_IRQS_IRETQ_DEBUG 0 RESTORE_ALL 8 - jmp irq_return -paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz paranoid_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz paranoid_schedule - movl %ebx,%edx /* arg3: thread flags */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp paranoid_userspace -paranoid_schedule: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - SCHEDULE_USER - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - jmp paranoid_userspace + INTERRUPT_RETURN CFI_ENDPROC END(paranoid_exit) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 3d5fb509bdeb..7114ba220fd4 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -126,6 +126,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp) *dr7 |= encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); + if (info->mask) + set_dr_addr_mask(info->mask, i); return 0; } @@ -161,29 +163,8 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) *dr7 &= ~__encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); -} - -static int get_hbp_len(u8 hbp_len) -{ - unsigned int len_in_bytes = 0; - - switch (hbp_len) { - case X86_BREAKPOINT_LEN_1: - len_in_bytes = 1; - break; - case X86_BREAKPOINT_LEN_2: - len_in_bytes = 2; - break; - case X86_BREAKPOINT_LEN_4: - len_in_bytes = 4; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - len_in_bytes = 8; - break; -#endif - } - return len_in_bytes; + if (info->mask) + set_dr_addr_mask(0, i); } /* @@ -196,7 +177,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) struct arch_hw_breakpoint *info = counter_arch_bp(bp); va = info->address; - len = get_hbp_len(info->len); + len = bp->attr.bp_len; return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -277,6 +258,8 @@ static int arch_build_bp_info(struct perf_event *bp) } /* Len */ + info->mask = 0; + switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: info->len = X86_BREAKPOINT_LEN_1; @@ -293,11 +276,17 @@ static int arch_build_bp_info(struct perf_event *bp) break; #endif default: - return -EINVAL; + if (!is_power_of_2(bp->attr.bp_len)) + return -EINVAL; + if (!cpu_has_bpext) + return -EOPNOTSUPP; + info->mask = bp->attr.bp_len - 1; + info->len = X86_BREAKPOINT_LEN_1; } return 0; } + /* * Validate the arch-specific HW Breakpoint register settings */ @@ -312,11 +301,11 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) if (ret) return ret; - ret = -EINVAL; - switch (info->len) { case X86_BREAKPOINT_LEN_1: align = 0; + if (info->mask) + align = info->mask; break; case X86_BREAKPOINT_LEN_2: align = 1; @@ -330,7 +319,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) break; #endif default: - return ret; + WARN_ON_ONCE(1); } /* diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index a9a4229f6161..81049ffab2d6 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -19,6 +19,19 @@ #include <asm/fpu-internal.h> #include <asm/user.h> +static DEFINE_PER_CPU(bool, in_kernel_fpu); + +void kernel_fpu_disable(void) +{ + WARN_ON(this_cpu_read(in_kernel_fpu)); + this_cpu_write(in_kernel_fpu, true); +} + +void kernel_fpu_enable(void) +{ + this_cpu_write(in_kernel_fpu, false); +} + /* * Were we in an interrupt that interrupted kernel mode? * @@ -33,6 +46,9 @@ */ static inline bool interrupted_kernel_fpu_idle(void) { + if (this_cpu_read(in_kernel_fpu)) + return false; + if (use_eager_fpu()) return __thread_has_fpu(current); @@ -73,10 +89,10 @@ void __kernel_fpu_begin(void) { struct task_struct *me = current; + this_cpu_write(in_kernel_fpu, true); + if (__thread_has_fpu(me)) { - __thread_clear_has_fpu(me); __save_init_fpu(me); - /* We do 'stts()' in __kernel_fpu_end() */ } else if (!use_eager_fpu()) { this_cpu_write(fpu_owner_task, NULL); clts(); @@ -86,19 +102,16 @@ EXPORT_SYMBOL(__kernel_fpu_begin); void __kernel_fpu_end(void) { - if (use_eager_fpu()) { - /* - * For eager fpu, most the time, tsk_used_math() is true. - * Restore the user math as we are done with the kernel usage. - * At few instances during thread exit, signal handling etc, - * tsk_used_math() is false. Those few places will take proper - * actions, so we don't need to restore the math here. - */ - if (likely(tsk_used_math(current))) - math_state_restore(); - } else { + struct task_struct *me = current; + + if (__thread_has_fpu(me)) { + if (WARN_ON(restore_fpu_checking(me))) + drop_init_fpu(me); + } else if (!use_eager_fpu()) { stts(); } + + this_cpu_write(in_kernel_fpu, false); } EXPORT_SYMBOL(__kernel_fpu_end); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 63ce838e5a54..28d28f5eb8f4 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -69,16 +69,9 @@ static void call_on_stack(void *func, void *stack) : "memory", "cc", "edx", "ecx", "eax"); } -/* how to get the current stack pointer from C */ -#define current_stack_pointer ({ \ - unsigned long sp; \ - asm("mov %%esp,%0" : "=g" (sp)); \ - sp; \ -}) - static inline void *current_stack(void) { - return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); + return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); } static inline int @@ -103,7 +96,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) /* Save the next esp at the bottom of the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer; + *prev_esp = current_stack_pointer(); if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -156,7 +149,7 @@ void do_softirq_own_stack(void) /* Push the previous esp onto the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer; + *prev_esp = current_stack_pointer(); call_on_stack(__do_softirq, isp); } diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c index 0ee5025e0fa4..d66a4fe6caee 100644 --- a/arch/x86/kernel/pmc_atom.c +++ b/arch/x86/kernel/pmc_atom.c @@ -25,8 +25,6 @@ #include <asm/pmc_atom.h> -#define DRIVER_NAME KBUILD_MODNAME - struct pmc_dev { u32 base_addr; void __iomem *regmap; @@ -38,12 +36,12 @@ struct pmc_dev { static struct pmc_dev pmc_device; static u32 acpi_base_addr; -struct pmc_dev_map { +struct pmc_bit_map { const char *name; u32 bit_mask; }; -static const struct pmc_dev_map dev_map[] = { +static const struct pmc_bit_map dev_map[] = { {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, @@ -82,6 +80,27 @@ static const struct pmc_dev_map dev_map[] = { {"35 - DFX", BIT_DFX}, }; +static const struct pmc_bit_map pss_map[] = { + {"0 - GBE", PMC_PSS_BIT_GBE}, + {"1 - SATA", PMC_PSS_BIT_SATA}, + {"2 - HDA", PMC_PSS_BIT_HDA}, + {"3 - SEC", PMC_PSS_BIT_SEC}, + {"4 - PCIE", PMC_PSS_BIT_PCIE}, + {"5 - LPSS", PMC_PSS_BIT_LPSS}, + {"6 - LPE", PMC_PSS_BIT_LPE}, + {"7 - DFX", PMC_PSS_BIT_DFX}, + {"8 - USH_CTRL", PMC_PSS_BIT_USH_CTRL}, + {"9 - USH_SUS", PMC_PSS_BIT_USH_SUS}, + {"10 - USH_VCCS", PMC_PSS_BIT_USH_VCCS}, + {"11 - USH_VCCA", PMC_PSS_BIT_USH_VCCA}, + {"12 - OTG_CTRL", PMC_PSS_BIT_OTG_CTRL}, + {"13 - OTG_VCCS", PMC_PSS_BIT_OTG_VCCS}, + {"14 - OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK}, + {"15 - OTG_VCCA", PMC_PSS_BIT_OTG_VCCA}, + {"16 - USB", PMC_PSS_BIT_USB}, + {"17 - USB_SUS", PMC_PSS_BIT_USB_SUS}, +}; + static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) { return readl(pmc->regmap + reg_offset); @@ -169,6 +188,32 @@ static const struct file_operations pmc_dev_state_ops = { .release = single_release, }; +static int pmc_pss_state_show(struct seq_file *s, void *unused) +{ + struct pmc_dev *pmc = s->private; + u32 pss = pmc_reg_read(pmc, PMC_PSS); + int pss_index; + + for (pss_index = 0; pss_index < ARRAY_SIZE(pss_map); pss_index++) { + seq_printf(s, "Island: %-32s\tState: %s\n", + pss_map[pss_index].name, + pss_map[pss_index].bit_mask & pss ? "Off" : "On"); + } + return 0; +} + +static int pmc_pss_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, pmc_pss_state_show, inode->i_private); +} + +static const struct file_operations pmc_pss_state_ops = { + .open = pmc_pss_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) { struct pmc_dev *pmc = s->private; @@ -202,11 +247,7 @@ static const struct file_operations pmc_sleep_tmr_ops = { static void pmc_dbgfs_unregister(struct pmc_dev *pmc) { - if (!pmc->dbgfs_dir) - return; - debugfs_remove_recursive(pmc->dbgfs_dir); - pmc->dbgfs_dir = NULL; } static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) @@ -217,19 +258,29 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) if (!dir) return -ENOMEM; + pmc->dbgfs_dir = dir; + f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, dir, pmc, &pmc_dev_state_ops); if (!f) { - dev_err(&pdev->dev, "dev_states register failed\n"); + dev_err(&pdev->dev, "dev_state register failed\n"); goto err; } + + f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO, + dir, pmc, &pmc_pss_state_ops); + if (!f) { + dev_err(&pdev->dev, "pss_state register failed\n"); + goto err; + } + f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, dir, pmc, &pmc_sleep_tmr_ops); if (!f) { dev_err(&pdev->dev, "sleep_state register failed\n"); goto err; } - pmc->dbgfs_dir = dir; + return 0; err: pmc_dbgfs_unregister(pmc); @@ -292,7 +343,6 @@ MODULE_DEVICE_TABLE(pci, pmc_pci_ids); static int __init pmc_atom_init(void) { - int err = -ENODEV; struct pci_dev *pdev = NULL; const struct pci_device_id *ent; @@ -306,14 +356,11 @@ static int __init pmc_atom_init(void) */ for_each_pci_dev(pdev) { ent = pci_match_id(pmc_pci_ids, pdev); - if (ent) { - err = pmc_setup_dev(pdev); - goto out; - } + if (ent) + return pmc_setup_dev(pdev); } /* Device not found. */ -out: - return err; + return -ENODEV; } module_init(pmc_atom_init); diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index ca9622a25e95..fe3dbfe0c4a5 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -170,7 +170,7 @@ static struct platform_device rtc_device = { static __init int add_rtc_cmos(void) { #ifdef CONFIG_PNP - static const char * const const ids[] __initconst = + static const char * const ids[] __initconst = { "PNP0b00", "PNP0b01", "PNP0b02", }; struct pnp_dev *dev; struct pnp_id *id; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 16b6043cb073..0d8071d7addb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -438,15 +438,13 @@ static void __init parse_setup_data(void) pa_data = boot_params.hdr.setup_data; while (pa_data) { - u32 data_len, map_len, data_type; + u32 data_len, data_type; - map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), - (u64)sizeof(struct setup_data)); - data = early_memremap(pa_data, map_len); + data = early_memremap(pa_data, sizeof(*data)); data_len = data->len + sizeof(struct setup_data); data_type = data->type; pa_next = data->next; - early_iounmap(data, map_len); + early_iounmap(data, sizeof(*data)); switch (data_type) { case SETUP_E820_EXT: diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ed37a768d0fc..2a33c8f68319 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -740,12 +740,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { user_exit(); -#ifdef CONFIG_X86_MCE - /* notify userspace of pending MCEs */ - if (thread_info_flags & _TIF_MCE_NOTIFY) - mce_notify_process(); -#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ - if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6d7022c683e3..febc6aabc72e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -73,7 +73,6 @@ #include <asm/setup.h> #include <asm/uv/uv.h> #include <linux/mc146818rtc.h> -#include <asm/smpboot_hooks.h> #include <asm/i8259.h> #include <asm/realmode.h> #include <asm/misc.h> @@ -104,6 +103,43 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); atomic_t init_deasserted; +static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) +{ + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); + CMOS_WRITE(0xa, 0xf); + spin_unlock_irqrestore(&rtc_lock, flags); + local_flush_tlb(); + pr_debug("1.\n"); + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = + start_eip >> 4; + pr_debug("2.\n"); + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = + start_eip & 0xf; + pr_debug("3.\n"); +} + +static inline void smpboot_restore_warm_reset_vector(void) +{ + unsigned long flags; + + /* + * Install writable page 0 entry to set BIOS data area. + */ + local_flush_tlb(); + + /* + * Paranoid: Set warm reset code and vector here back + * to default values. + */ + spin_lock_irqsave(&rtc_lock, flags); + CMOS_WRITE(0, 0xf); + spin_unlock_irqrestore(&rtc_lock, flags); + + *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; +} + /* * Report back to the Boot Processor during boot time or to the caller processor * during CPU online. @@ -136,8 +172,7 @@ static void smp_callin(void) * CPU, first the APIC. (this is probably redundant on most * boards) */ - setup_local_APIC(); - end_local_APIC_setup(); + apic_ap_setup(); /* * Need to setup vector mappings before we enable interrupts. @@ -955,9 +990,12 @@ void arch_disable_smp_support(void) */ static __init void disable_smp(void) { + pr_info("SMP disabled\n"); + + disable_ioapic_support(); + init_cpu_present(cpumask_of(0)); init_cpu_possible(cpumask_of(0)); - smpboot_clear_io_apic_irqs(); if (smp_found_config) physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); @@ -967,6 +1005,13 @@ static __init void disable_smp(void) cpumask_set_cpu(0, cpu_core_mask(0)); } +enum { + SMP_OK, + SMP_NO_CONFIG, + SMP_NO_APIC, + SMP_FORCE_UP, +}; + /* * Various sanity checks. */ @@ -1014,10 +1059,7 @@ static int __init smp_sanity_check(unsigned max_cpus) if (!smp_found_config && !acpi_lapic) { preempt_enable(); pr_notice("SMP motherboard not detected\n"); - disable_smp(); - if (APIC_init_uniprocessor()) - pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); - return -1; + return SMP_NO_CONFIG; } /* @@ -1041,9 +1083,7 @@ static int __init smp_sanity_check(unsigned max_cpus) boot_cpu_physical_apicid); pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); } - smpboot_clear_io_apic(); - disable_ioapic_support(); - return -1; + return SMP_NO_APIC; } verify_local_APIC(); @@ -1053,15 +1093,10 @@ static int __init smp_sanity_check(unsigned max_cpus) */ if (!max_cpus) { pr_info("SMP mode deactivated\n"); - smpboot_clear_io_apic(); - - connect_bsp_APIC(); - setup_local_APIC(); - bsp_end_local_APIC_setup(); - return -1; + return SMP_FORCE_UP; } - return 0; + return SMP_OK; } static void __init smp_cpu_index_default(void) @@ -1101,10 +1136,21 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } set_cpu_sibling_map(0); - if (smp_sanity_check(max_cpus) < 0) { - pr_info("SMP disabled\n"); + switch (smp_sanity_check(max_cpus)) { + case SMP_NO_CONFIG: disable_smp(); + if (APIC_init_uniprocessor()) + pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); return; + case SMP_NO_APIC: + disable_smp(); + return; + case SMP_FORCE_UP: + disable_smp(); + apic_bsp_setup(false); + return; + case SMP_OK: + break; } default_setup_apic_routing(); @@ -1115,33 +1161,10 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) /* Or can we switch back to PIC here? */ } - connect_bsp_APIC(); - - /* - * Switch from PIC to APIC mode. - */ - setup_local_APIC(); - - if (x2apic_mode) - cpu0_logical_apicid = apic_read(APIC_LDR); - else - cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - - /* - * Enable IO APIC before setting up error vector - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); - - bsp_end_local_APIC_setup(); - smpboot_setup_io_apic(); - /* - * Set up local APIC timer on boot CPU. - */ + cpu0_logical_apicid = apic_bsp_setup(false); pr_info("CPU%d: ", 0); print_cpu_info(&cpu_data(0)); - x86_init.timers.setup_percpu_clockev(); if (is_uv_system()) uv_system_init(); @@ -1177,9 +1200,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) nmi_selftest(); impress_friends(); -#ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); -#endif mtrr_aps_init(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 88900e288021..9d2073e2ecc9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -108,6 +108,88 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } +enum ctx_state ist_enter(struct pt_regs *regs) +{ + enum ctx_state prev_state; + + if (user_mode_vm(regs)) { + /* Other than that, we're just an exception. */ + prev_state = exception_enter(); + } else { + /* + * We might have interrupted pretty much anything. In + * fact, if we're a machine check, we can even interrupt + * NMI processing. We don't want in_nmi() to return true, + * but we need to notify RCU. + */ + rcu_nmi_enter(); + prev_state = IN_KERNEL; /* the value is irrelevant. */ + } + + /* + * We are atomic because we're on the IST stack (or we're on x86_32, + * in which case we still shouldn't schedule). + * + * This must be after exception_enter(), because exception_enter() + * won't do anything if in_interrupt() returns true. + */ + preempt_count_add(HARDIRQ_OFFSET); + + /* This code is a bit fragile. Test it. */ + rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work"); + + return prev_state; +} + +void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) +{ + /* Must be before exception_exit. */ + preempt_count_sub(HARDIRQ_OFFSET); + + if (user_mode_vm(regs)) + return exception_exit(prev_state); + else + rcu_nmi_exit(); +} + +/** + * ist_begin_non_atomic() - begin a non-atomic section in an IST exception + * @regs: regs passed to the IST exception handler + * + * IST exception handlers normally cannot schedule. As a special + * exception, if the exception interrupted userspace code (i.e. + * user_mode_vm(regs) would return true) and the exception was not + * a double fault, it can be safe to schedule. ist_begin_non_atomic() + * begins a non-atomic section within an ist_enter()/ist_exit() region. + * Callers are responsible for enabling interrupts themselves inside + * the non-atomic section, and callers must call is_end_non_atomic() + * before ist_exit(). + */ +void ist_begin_non_atomic(struct pt_regs *regs) +{ + BUG_ON(!user_mode_vm(regs)); + + /* + * Sanity check: we need to be on the normal thread stack. This + * will catch asm bugs and any attempt to use ist_preempt_enable + * from double_fault. + */ + BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack)) + & ~(THREAD_SIZE - 1)) != 0); + + preempt_count_sub(HARDIRQ_OFFSET); +} + +/** + * ist_end_non_atomic() - begin a non-atomic section in an IST exception + * + * Ends a non-atomic section started with ist_begin_non_atomic(). + */ +void ist_end_non_atomic(void) +{ + preempt_count_add(HARDIRQ_OFFSET); +} + static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, struct pt_regs *regs, long error_code) @@ -251,6 +333,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * end up promoting it to a doublefault. In that case, modify * the stack to make it look like we just entered the #GP * handler from user space, similar to bad_iret. + * + * No need for ist_enter here because we don't use RCU. */ if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && regs->cs == __KERNEL_CS && @@ -263,12 +347,12 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ regs->ip = (unsigned long)general_protection; regs->sp = (unsigned long)&normal_regs->orig_ax; + return; } #endif - exception_enter(); - /* Return not checked because double check cannot be ignored */ + ist_enter(regs); /* Discard prev_state because we won't return. */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); tsk->thread.error_code = error_code; @@ -434,7 +518,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; - prev_state = exception_enter(); + prev_state = ist_enter(regs); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -460,33 +544,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); exit: - exception_exit(prev_state); + ist_exit(regs, prev_state); } NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* - * Help handler running on IST stack to switch back to user stack - * for scheduling or signal handling. The actual stack switch is done in - * entry.S + * Help handler running on IST stack to switch off the IST stack if the + * interrupted code was in user mode. The actual stack switch is done in + * entry_64.S */ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = eregs; - /* Did already sync */ - if (eregs == (struct pt_regs *)eregs->sp) - ; - /* Exception from user space */ - else if (user_mode(eregs)) - regs = task_pt_regs(current); - /* - * Exception from kernel and interrupts are enabled. Move to - * kernel process stack. - */ - else if (eregs->flags & X86_EFLAGS_IF) - regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); - if (eregs != regs) - *regs = *eregs; + struct pt_regs *regs = task_pt_regs(current); + *regs = *eregs; return regs; } NOKPROBE_SYMBOL(sync_regs); @@ -554,7 +625,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; - prev_state = exception_enter(); + prev_state = ist_enter(regs); get_debugreg(dr6, 6); @@ -629,7 +700,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: - exception_exit(prev_state); + ist_exit(regs, prev_state); } NOKPROBE_SYMBOL(do_debug); @@ -788,18 +859,16 @@ void math_state_restore(void) local_irq_disable(); } + /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ + kernel_fpu_disable(); __thread_fpu_begin(tsk); - - /* - * Paranoid restore. send a SIGSEGV if we fail to restore the state. - */ if (unlikely(restore_fpu_checking(tsk))) { drop_init_fpu(tsk); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); - return; + } else { + tsk->thread.fpu_counter++; } - - tsk->thread.fpu_counter++; + kernel_fpu_enable(); } EXPORT_SYMBOL_GPL(math_state_restore); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index f9d16ff56c6b..7dc7ba577ecd 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -40,6 +40,7 @@ config KVM select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_VFIO + select SRCU ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 079c3b6a3ff1..7ff24240d863 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -238,6 +238,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr, } } +static const char *page_size_string(struct map_range *mr) +{ + static const char str_1g[] = "1G"; + static const char str_2m[] = "2M"; + static const char str_4m[] = "4M"; + static const char str_4k[] = "4k"; + + if (mr->page_size_mask & (1<<PG_LEVEL_1G)) + return str_1g; + /* + * 32-bit without PAE has a 4M large page size. + * PG_LEVEL_2M is misnamed, but we can at least + * print out the right size in the string. + */ + if (IS_ENABLED(CONFIG_X86_32) && + !IS_ENABLED(CONFIG_X86_PAE) && + mr->page_size_mask & (1<<PG_LEVEL_2M)) + return str_4m; + + if (mr->page_size_mask & (1<<PG_LEVEL_2M)) + return str_2m; + + return str_4k; +} + static int __meminit split_mem_range(struct map_range *mr, int nr_range, unsigned long start, unsigned long end) @@ -333,8 +358,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, for (i = 0; i < nr_range; i++) printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", mr[i].start, mr[i].end - 1, - (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( - (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); + page_size_string(&mr[i])); return nr_range; } diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 44b9271580b5..852aa4c92da0 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -293,7 +293,6 @@ static void mrst_power_off_unused_dev(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); /* diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 326198a4434e..676e5e04e4d4 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -610,6 +610,32 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) return 0; } +#ifdef CONFIG_ACPI_APEI +extern int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size, + void *data), void *data); + +static int pci_mmcfg_for_each_region(int (*func)(__u64 start, __u64 size, + void *data), void *data) +{ + struct pci_mmcfg_region *cfg; + int rc; + + if (list_empty(&pci_mmcfg_list)) + return 0; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + rc = func(cfg->res.start, resource_size(&cfg->res), data); + if (rc) + return rc; + } + + return 0; +} +#define set_apei_filter() (arch_apei_filter_addr = pci_mmcfg_for_each_region) +#else +#define set_apei_filter() +#endif + static void __init __pci_mmcfg_init(int early) { pci_mmcfg_reject_broken(early); @@ -644,6 +670,8 @@ void __init pci_mmcfg_early_init(void) else acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); __pci_mmcfg_init(1); + + set_apei_filter(); } } diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 5fcda7272550..86d0f9e08dd9 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -91,167 +91,6 @@ ENTRY(efi_call) ret ENDPROC(efi_call) -#ifdef CONFIG_EFI_MIXED - -/* - * We run this function from the 1:1 mapping. - * - * This function must be invoked with a 1:1 mapped stack. - */ -ENTRY(__efi64_thunk) - movl %ds, %eax - push %rax - movl %es, %eax - push %rax - movl %ss, %eax - push %rax - - subq $32, %rsp - movl %esi, 0x0(%rsp) - movl %edx, 0x4(%rsp) - movl %ecx, 0x8(%rsp) - movq %r8, %rsi - movl %esi, 0xc(%rsp) - movq %r9, %rsi - movl %esi, 0x10(%rsp) - - sgdt save_gdt(%rip) - - leaq 1f(%rip), %rbx - movq %rbx, func_rt_ptr(%rip) - - /* Switch to gdt with 32-bit segments */ - movl 64(%rsp), %eax - lgdt (%rax) - - leaq efi_enter32(%rip), %rax - pushq $__KERNEL_CS - pushq %rax - lretq - -1: addq $32, %rsp - - lgdt save_gdt(%rip) - - pop %rbx - movl %ebx, %ss - pop %rbx - movl %ebx, %es - pop %rbx - movl %ebx, %ds - - /* - * Convert 32-bit status code into 64-bit. - */ - test %rax, %rax - jz 1f - movl %eax, %ecx - andl $0x0fffffff, %ecx - andl $0xf0000000, %eax - shl $32, %rax - or %rcx, %rax -1: - ret -ENDPROC(__efi64_thunk) - -ENTRY(efi_exit32) - movq func_rt_ptr(%rip), %rax - push %rax - mov %rdi, %rax - ret -ENDPROC(efi_exit32) - - .code32 -/* - * EFI service pointer must be in %edi. - * - * The stack should represent the 32-bit calling convention. - */ -ENTRY(efi_enter32) - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es - movl %eax, %ss - - /* Reload pgtables */ - movl %cr3, %eax - movl %eax, %cr3 - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - /* Disable long mode via EFER */ - movl $MSR_EFER, %ecx - rdmsr - btrl $_EFER_LME, %eax - wrmsr - - call *%edi - - /* We must preserve return value */ - movl %eax, %edi - - /* - * Some firmware will return with interrupts enabled. Be sure to - * disable them before we switch GDTs. - */ - cli - - movl 68(%esp), %eax - movl %eax, 2(%eax) - lgdtl (%eax) - - movl %cr4, %eax - btsl $(X86_CR4_PAE_BIT), %eax - movl %eax, %cr4 - - movl %cr3, %eax - movl %eax, %cr3 - - movl $MSR_EFER, %ecx - rdmsr - btsl $_EFER_LME, %eax - wrmsr - - xorl %eax, %eax - lldt %ax - - movl 72(%esp), %eax - pushl $__KERNEL_CS - pushl %eax - - /* Enable paging */ - movl %cr0, %eax - btsl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - lret -ENDPROC(efi_enter32) - - .data - .balign 8 - .global efi32_boot_gdt -efi32_boot_gdt: .word 0 - .quad 0 - -save_gdt: .word 0 - .quad 0 -func_rt_ptr: .quad 0 - - .global efi_gdt64 -efi_gdt64: - .word efi_gdt64_end - efi_gdt64 - .long 0 /* Filled out by user */ - .word 0 - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x00af9a000000ffff /* __KERNEL_CS */ - .quad 0x00cf92000000ffff /* __KERNEL_DS */ - .quad 0x0080890000000000 /* TS descriptor */ - .quad 0x0000000000000000 /* TS continued */ -efi_gdt64_end: -#endif /* CONFIG_EFI_MIXED */ - .data ENTRY(efi_scratch) .fill 3,8,0 diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 8806fa73e6e6..ff85d28c50f2 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -1,9 +1,26 @@ /* * Copyright (C) 2014 Intel Corporation; author Matt Fleming + * + * Support for invoking 32-bit EFI runtime services from a 64-bit + * kernel. + * + * The below thunking functions are only used after ExitBootServices() + * has been called. This simplifies things considerably as compared with + * the early EFI thunking because we can leave all the kernel state + * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime + * services from __KERNEL32_CS. This means we can continue to service + * interrupts across an EFI mixed mode call. + * + * We do however, need to handle the fact that we're running in a full + * 64-bit virtual address space. Things like the stack and instruction + * addresses need to be accessible by the 32-bit firmware, so we rely on + * using the identity mappings in the EFI page table to access the stack + * and kernel text (see efi_setup_page_tables()). */ #include <linux/linkage.h> #include <asm/page_types.h> +#include <asm/segment.h> .text .code64 @@ -33,14 +50,6 @@ ENTRY(efi64_thunk) leaq efi_exit32(%rip), %rbx subq %rax, %rbx movl %ebx, 8(%rsp) - leaq efi_gdt64(%rip), %rbx - subq %rax, %rbx - movl %ebx, 2(%ebx) - movl %ebx, 4(%rsp) - leaq efi_gdt32(%rip), %rbx - subq %rax, %rbx - movl %ebx, 2(%ebx) - movl %ebx, (%rsp) leaq __efi64_thunk(%rip), %rbx subq %rax, %rbx @@ -52,14 +61,92 @@ ENTRY(efi64_thunk) retq ENDPROC(efi64_thunk) - .data -efi_gdt32: - .word efi_gdt32_end - efi_gdt32 - .long 0 /* Filled out above */ - .word 0 - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x00cf9a000000ffff /* __KERNEL_CS */ - .quad 0x00cf93000000ffff /* __KERNEL_DS */ -efi_gdt32_end: +/* + * We run this function from the 1:1 mapping. + * + * This function must be invoked with a 1:1 mapped stack. + */ +ENTRY(__efi64_thunk) + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + subq $32, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movq %r8, %rsi + movl %esi, 0xc(%rsp) + movq %r9, %rsi + movl %esi, 0x10(%rsp) + + leaq 1f(%rip), %rbx + movq %rbx, func_rt_ptr(%rip) + + /* Switch to 32-bit descriptor */ + pushq $__KERNEL32_CS + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $32, %rsp + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + /* + * Convert 32-bit status code into 64-bit. + */ + test %rax, %rax + jz 1f + movl %eax, %ecx + andl $0x0fffffff, %ecx + andl $0xf0000000, %eax + shl $32, %rax + or %rcx, %rax +1: + ret +ENDPROC(__efi64_thunk) + +ENTRY(efi_exit32) + movq func_rt_ptr(%rip), %rax + push %rax + mov %rdi, %rax + ret +ENDPROC(efi_exit32) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + movl 72(%esp), %eax + pushl $__KERNEL_CS + pushl %eax + + lret +ENDPROC(efi_enter32) + + .data + .balign 8 +func_rt_ptr: .quad 0 efi_saved_sp: .quad 0 diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 5a4affe025e8..09297c8e1fcd 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -205,4 +205,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE PHONY += vdso_install $(vdso_img_insttargets) vdso_install: $(vdso_img_insttargets) FORCE -clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* +clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 56d08fd75b1a..26cb624ace05 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -715,7 +715,7 @@ int efi_partition(struct parsed_partitions *state) state->parts[i + 1].flags = ADDPART_FLAG_RAID; info = &state->parts[i + 1].info; - efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid); + efi_guid_to_str(&ptes[i].unique_partition_guid, info->uuid); /* Naively convert UTF16-LE to 7 bits. */ label_max = min(ARRAY_SIZE(info->volname) - 1, diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 2cd7bdd6c8b3..a85ac07f3da3 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -449,7 +449,7 @@ int apei_resources_sub(struct apei_resources *resources1, } EXPORT_SYMBOL_GPL(apei_resources_sub); -static int apei_get_nvs_callback(__u64 start, __u64 size, void *data) +static int apei_get_res_callback(__u64 start, __u64 size, void *data) { struct apei_resources *resources = data; return apei_res_add(&resources->iomem, start, size); @@ -457,7 +457,15 @@ static int apei_get_nvs_callback(__u64 start, __u64 size, void *data) static int apei_get_nvs_resources(struct apei_resources *resources) { - return acpi_nvs_for_each_region(apei_get_nvs_callback, resources); + return acpi_nvs_for_each_region(apei_get_res_callback, resources); +} + +int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size, + void *data), void *data); +static int apei_get_arch_resources(struct apei_resources *resources) + +{ + return arch_apei_filter_addr(apei_get_res_callback, resources); } /* @@ -470,7 +478,7 @@ int apei_resources_request(struct apei_resources *resources, { struct apei_res *res, *res_bak = NULL; struct resource *r; - struct apei_resources nvs_resources; + struct apei_resources nvs_resources, arch_res; int rc; rc = apei_resources_sub(resources, &apei_resources_all); @@ -485,10 +493,20 @@ int apei_resources_request(struct apei_resources *resources, apei_resources_init(&nvs_resources); rc = apei_get_nvs_resources(&nvs_resources); if (rc) - goto res_fini; + goto nvs_res_fini; rc = apei_resources_sub(resources, &nvs_resources); if (rc) - goto res_fini; + goto nvs_res_fini; + + if (arch_apei_filter_addr) { + apei_resources_init(&arch_res); + rc = apei_get_arch_resources(&arch_res); + if (rc) + goto arch_res_fini; + rc = apei_resources_sub(resources, &arch_res); + if (rc) + goto arch_res_fini; + } rc = -EINVAL; list_for_each_entry(res, &resources->iomem, list) { @@ -536,7 +554,9 @@ err_unmap_iomem: break; release_mem_region(res->start, res->end - res->start); } -res_fini: +arch_res_fini: + apei_resources_fini(&arch_res); +nvs_res_fini: apei_resources_fini(&nvs_resources); return rc; } diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index 0da5865df5b1..beb8b27d4621 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -51,9 +51,11 @@ struct regmap_async { struct regmap { union { struct mutex mutex; - spinlock_t spinlock; + struct { + spinlock_t spinlock; + unsigned long spinlock_flags; + }; }; - unsigned long spinlock_flags; regmap_lock lock; regmap_unlock unlock; void *lock_arg; /* This is passed to lock/unlock functions */ @@ -233,6 +235,10 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, void regmap_async_complete_cb(struct regmap_async *async, int ret); +enum regmap_endian regmap_get_val_endian(struct device *dev, + const struct regmap_bus *bus, + const struct regmap_config *config); + extern struct regcache_ops regcache_rbtree_ops; extern struct regcache_ops regcache_lzo_ops; extern struct regcache_ops regcache_flat_ops; diff --git a/drivers/base/regmap/regmap-ac97.c b/drivers/base/regmap/regmap-ac97.c index e4c45d2299c1..8d304e2a943d 100644 --- a/drivers/base/regmap/regmap-ac97.c +++ b/drivers/base/regmap/regmap-ac97.c @@ -74,8 +74,8 @@ static int regmap_ac97_reg_write(void *context, unsigned int reg, } static const struct regmap_bus ac97_regmap_bus = { - .reg_write = regmap_ac97_reg_write, - .reg_read = regmap_ac97_reg_read, + .reg_write = regmap_ac97_reg_write, + .reg_read = regmap_ac97_reg_read, }; /** diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c index 053150a7f9f2..4b76e33110a2 100644 --- a/drivers/base/regmap/regmap-i2c.c +++ b/drivers/base/regmap/regmap-i2c.c @@ -14,6 +14,7 @@ #include <linux/i2c.h> #include <linux/module.h> +#include "internal.h" static int regmap_smbus_byte_reg_read(void *context, unsigned int reg, unsigned int *val) @@ -87,6 +88,42 @@ static struct regmap_bus regmap_smbus_word = { .reg_read = regmap_smbus_word_reg_read, }; +static int regmap_smbus_word_read_swapped(void *context, unsigned int reg, + unsigned int *val) +{ + struct device *dev = context; + struct i2c_client *i2c = to_i2c_client(dev); + int ret; + + if (reg > 0xff) + return -EINVAL; + + ret = i2c_smbus_read_word_swapped(i2c, reg); + if (ret < 0) + return ret; + + *val = ret; + + return 0; +} + +static int regmap_smbus_word_write_swapped(void *context, unsigned int reg, + unsigned int val) +{ + struct device *dev = context; + struct i2c_client *i2c = to_i2c_client(dev); + + if (val > 0xffff || reg > 0xff) + return -EINVAL; + + return i2c_smbus_write_word_swapped(i2c, reg, val); +} + +static struct regmap_bus regmap_smbus_word_swapped = { + .reg_write = regmap_smbus_word_write_swapped, + .reg_read = regmap_smbus_word_read_swapped, +}; + static int regmap_i2c_write(void *context, const void *data, size_t count) { struct device *dev = context; @@ -180,7 +217,14 @@ static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c, else if (config->val_bits == 16 && config->reg_bits == 8 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_WORD_DATA)) - return ®map_smbus_word; + switch (regmap_get_val_endian(&i2c->dev, NULL, config)) { + case REGMAP_ENDIAN_LITTLE: + return ®map_smbus_word; + case REGMAP_ENDIAN_BIG: + return ®map_smbus_word_swapped; + default: /* everything else is not supported */ + break; + } else if (config->val_bits == 8 && config->reg_bits == 8 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index d2f8a818d200..f99b098ddabf 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -473,9 +473,9 @@ static enum regmap_endian regmap_get_reg_endian(const struct regmap_bus *bus, return REGMAP_ENDIAN_BIG; } -static enum regmap_endian regmap_get_val_endian(struct device *dev, - const struct regmap_bus *bus, - const struct regmap_config *config) +enum regmap_endian regmap_get_val_endian(struct device *dev, + const struct regmap_bus *bus, + const struct regmap_config *config) { struct device_node *np; enum regmap_endian endian; @@ -513,6 +513,7 @@ static enum regmap_endian regmap_get_val_endian(struct device *dev, /* Use this if no other value was found */ return REGMAP_ENDIAN_BIG; } +EXPORT_SYMBOL_GPL(regmap_get_val_endian); /** * regmap_init(): Initialise register map diff --git a/drivers/char/random.c b/drivers/char/random.c index 04645c09fe5e..9cd6968e2f92 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -569,19 +569,19 @@ static void fast_mix(struct fast_pool *f) __u32 c = f->pool[2], d = f->pool[3]; a += b; c += d; - b = rol32(a, 6); d = rol32(c, 27); + b = rol32(b, 6); d = rol32(d, 27); d ^= a; b ^= c; a += b; c += d; - b = rol32(a, 16); d = rol32(c, 14); + b = rol32(b, 16); d = rol32(d, 14); d ^= a; b ^= c; a += b; c += d; - b = rol32(a, 6); d = rol32(c, 27); + b = rol32(b, 6); d = rol32(d, 27); d ^= a; b ^= c; a += b; c += d; - b = rol32(a, 16); d = rol32(c, 14); + b = rol32(b, 16); d = rol32(d, 14); d ^= a; b ^= c; f->pool[0] = a; f->pool[1] = b; diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 3f44f292d066..91f86131bb7a 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -13,6 +13,7 @@ config COMMON_CLK bool select HAVE_CLK_PREPARE select CLKDEV_LOOKUP + select SRCU ---help--- The common clock framework is a single definition of struct clk, useful across many platforms, as well as an diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 29b2ef5a68b9..a171fef2c2b6 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -2,6 +2,7 @@ menu "CPU Frequency scaling" config CPU_FREQ bool "CPU Frequency scaling" + select SRCU help CPU Frequency scaling allows you to change the clock speed of CPUs on the fly. This is a nice method to save power, because diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index faf4e70c42e0..3891f6781298 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -1,5 +1,6 @@ menuconfig PM_DEVFREQ bool "Generic Dynamic Voltage and Frequency Scaling (DVFS) support" + select SRCU help A device may have a list of frequencies and voltages available. devfreq, a generic DVFS framework can be registered for a device diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index f712d47f30d8..8de4da5c9ab6 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -12,11 +12,11 @@ config EFI_VARS Note that using this driver in concert with efibootmgr requires at least test release version 0.5.0-test3 or later, which is - available from Matt Domsch's website located at: + available from: <http://linux.dell.com/efibootmgr/testing/efibootmgr-0.5.0-test3.tar.gz> Subsequent efibootmgr releases may be found at: - <http://linux.dell.com/efibootmgr> + <http://github.com/vathpela/efibootmgr> config EFI_VARS_PSTORE tristate "Register efivars backend for pstore" diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 9035c1b74d58..fccb464928c3 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -115,15 +115,24 @@ EFI_ATTR_SHOW(fw_vendor); EFI_ATTR_SHOW(runtime); EFI_ATTR_SHOW(config_table); +static ssize_t fw_platform_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", efi_enabled(EFI_64BIT) ? 64 : 32); +} + static struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor); static struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime); static struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table); +static struct kobj_attribute efi_attr_fw_platform_size = + __ATTR_RO(fw_platform_size); static struct attribute *efi_subsys_attrs[] = { &efi_attr_systab.attr, &efi_attr_fw_vendor.attr, &efi_attr_runtime.attr, &efi_attr_config_table.attr, + &efi_attr_fw_platform_size.attr, NULL, }; @@ -272,15 +281,10 @@ static __init int match_config_table(efi_guid_t *guid, unsigned long table, efi_config_table_type_t *table_types) { - u8 str[EFI_VARIABLE_GUID_LEN + 1]; int i; if (table_types) { - efi_guid_unparse(guid, str); - for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) { - efi_guid_unparse(&table_types[i].guid, str); - if (!efi_guidcmp(*guid, table_types[i].guid)) { *(table_types[i].ptr) = table; pr_cont(" %s=0x%lx ", @@ -403,8 +407,7 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname, u64 val; int i, len; - if (depth != 1 || - (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) + if (depth != 1 || strcmp(uname, "chosen") != 0) return 0; for (i = 0; i < ARRAY_SIZE(dt_params); i++) { diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index f256ecd8a176..7b2e0496e0c0 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -39,7 +39,7 @@ * fix locking per Peter Chubb's findings * * 25 Mar 2002 - Matt Domsch <Matt_Domsch@dell.com> - * move uuid_unparse() to include/asm-ia64/efi.h:efi_guid_unparse() + * move uuid_unparse() to include/asm-ia64/efi.h:efi_guid_to_str() * * 12 Feb 2002 - Matt Domsch <Matt_Domsch@dell.com> * use list_for_each_safe when deleting vars. @@ -128,7 +128,7 @@ efivar_guid_read(struct efivar_entry *entry, char *buf) if (!entry || !buf) return 0; - efi_guid_unparse(&var->VendorGuid, str); + efi_guid_to_str(&var->VendorGuid, str); str += strlen(str); str += sprintf(str, "\n"); @@ -569,7 +569,7 @@ efivar_create_sysfs_entry(struct efivar_entry *new_var) private variables from another's. */ *(short_name + strlen(short_name)) = '-'; - efi_guid_unparse(&new_var->var.VendorGuid, + efi_guid_to_str(&new_var->var.VendorGuid, short_name + strlen(short_name)); new_var->kobj.kset = efivars_kset; diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index b14bc2b9fb4d..8902f52e0998 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -24,3 +24,17 @@ lib-y := efi-stub-helper.o lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o CFLAGS_fdt.o += -I$(srctree)/scripts/dtc/libfdt/ + +# +# arm64 puts the stub in the kernel proper, which will unnecessarily retain all +# code indefinitely unless it is annotated as __init/__initdata/__initconst etc. +# So let's apply the __init annotations at the section level, by prefixing +# the section names directly. This will ensure that even all the inline string +# literals are covered. +# +extra-$(CONFIG_ARM64) := $(lib-y) +lib-$(CONFIG_ARM64) := $(patsubst %.o,%.init.o,$(lib-y)) + +OBJCOPYFLAGS := --prefix-alloc-sections=.init +$(obj)/%.init.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index eb48a1a1a576..2b3814702dcf 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -17,10 +17,10 @@ #include "efistub.h" -static int __init efi_secureboot_enabled(efi_system_table_t *sys_table_arg) +static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg) { - static efi_guid_t const var_guid __initconst = EFI_GLOBAL_VARIABLE_GUID; - static efi_char16_t const var_name[] __initconst = { + static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID; + static efi_char16_t const var_name[] = { 'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 }; efi_get_variable_t *f_getvar = sys_table_arg->runtime->get_variable; @@ -164,7 +164,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table, * for both archictectures, with the arch-specific code provided in the * handle_kernel_image() function. */ -unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table, +unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, unsigned long *image_addr) { efi_loaded_image_t *image; diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index a920fec8fe88..9bd9fbb5bea8 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -101,7 +101,7 @@ fail: } -unsigned long __init get_dram_base(efi_system_table_t *sys_table_arg) +unsigned long get_dram_base(efi_system_table_t *sys_table_arg) { efi_status_t status; unsigned long map_size; diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c index 018c29a26615..87b8e3b900d2 100644 --- a/drivers/firmware/efi/runtime-map.c +++ b/drivers/firmware/efi/runtime-map.c @@ -191,7 +191,7 @@ int __init efi_runtime_map_init(struct kobject *efi_kobj) return 0; out_add_entry: - for (j = i - 1; j > 0; j--) { + for (j = i - 1; j >= 0; j--) { entry = *(map_entries + j); kobject_put(&entry->kobj); } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index a7de26d1ac80..d931cbbed240 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1389,6 +1389,7 @@ config SENSORS_ADS1015 config SENSORS_ADS7828 tristate "Texas Instruments ADS7828 and compatibles" depends on I2C + select REGMAP_I2C help If you say yes here you get support for Texas Instruments ADS7828 and ADS7830 8-channel A/D converters. ADS7828 resolution is 12-bit, while @@ -1430,8 +1431,8 @@ config SENSORS_INA2XX tristate "Texas Instruments INA219 and compatibles" depends on I2C help - If you say yes here you get support for INA219, INA220, INA226, and - INA230 power monitor chips. + If you say yes here you get support for INA219, INA220, INA226, + INA230, and INA231 power monitor chips. The INA2xx driver is configured for the default configuration of the part as described in the datasheet. diff --git a/drivers/hwmon/abx500.c b/drivers/hwmon/abx500.c index 13875968c844..6cb89c0ebab6 100644 --- a/drivers/hwmon/abx500.c +++ b/drivers/hwmon/abx500.c @@ -221,7 +221,7 @@ static ssize_t show_min(struct device *dev, struct abx500_temp *data = dev_get_drvdata(dev); struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - return sprintf(buf, "%ld\n", data->min[attr->index]); + return sprintf(buf, "%lu\n", data->min[attr->index]); } static ssize_t show_max(struct device *dev, @@ -230,7 +230,7 @@ static ssize_t show_max(struct device *dev, struct abx500_temp *data = dev_get_drvdata(dev); struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - return sprintf(buf, "%ld\n", data->max[attr->index]); + return sprintf(buf, "%lu\n", data->max[attr->index]); } static ssize_t show_max_hyst(struct device *dev, @@ -239,7 +239,7 @@ static ssize_t show_max_hyst(struct device *dev, struct abx500_temp *data = dev_get_drvdata(dev); struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - return sprintf(buf, "%ld\n", data->max_hyst[attr->index]); + return sprintf(buf, "%lu\n", data->max_hyst[attr->index]); } static ssize_t show_min_alarm(struct device *dev, diff --git a/drivers/hwmon/ad7314.c b/drivers/hwmon/ad7314.c index f4f9b219bf16..11955467fc0f 100644 --- a/drivers/hwmon/ad7314.c +++ b/drivers/hwmon/ad7314.c @@ -16,6 +16,7 @@ #include <linux/err.h> #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> +#include <linux/bitops.h> /* * AD7314 temperature masks @@ -67,7 +68,7 @@ static ssize_t ad7314_show_temperature(struct device *dev, switch (spi_get_device_id(chip->spi_dev)->driver_data) { case ad7314: data = (ret & AD7314_TEMP_MASK) >> AD7314_TEMP_SHIFT; - data = (data << 6) >> 6; + data = sign_extend32(data, 9); return sprintf(buf, "%d\n", 250 * data); case adt7301: @@ -78,7 +79,7 @@ static ssize_t ad7314_show_temperature(struct device *dev, * register. 1lsb - 31.25 milli degrees centigrade */ data = ret & ADT7301_TEMP_MASK; - data = (data << 2) >> 2; + data = sign_extend32(data, 13); return sprintf(buf, "%d\n", DIV_ROUND_CLOSEST(data * 3125, 100)); diff --git a/drivers/hwmon/adc128d818.c b/drivers/hwmon/adc128d818.c index 0625e50d7a6e..ad2b47e40345 100644 --- a/drivers/hwmon/adc128d818.c +++ b/drivers/hwmon/adc128d818.c @@ -27,6 +27,7 @@ #include <linux/err.h> #include <linux/regulator/consumer.h> #include <linux/mutex.h> +#include <linux/bitops.h> /* Addresses to scan * The chip also supports addresses 0x35..0x37. Don't scan those addresses @@ -189,7 +190,7 @@ static ssize_t adc128_show_temp(struct device *dev, if (IS_ERR(data)) return PTR_ERR(data); - temp = (data->temp[index] << 7) >> 7; /* sign extend */ + temp = sign_extend32(data->temp[index], 8); return sprintf(buf, "%d\n", temp * 500);/* 0.5 degrees C resolution */ } diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c index a622d40eec17..bce4e9ff21bf 100644 --- a/drivers/hwmon/ads7828.c +++ b/drivers/hwmon/ads7828.c @@ -30,14 +30,12 @@ #include <linux/hwmon-sysfs.h> #include <linux/i2c.h> #include <linux/init.h> -#include <linux/jiffies.h> #include <linux/module.h> -#include <linux/mutex.h> #include <linux/platform_data/ads7828.h> +#include <linux/regmap.h> #include <linux/slab.h> /* The ADS7828 registers */ -#define ADS7828_NCH 8 /* 8 channels supported */ #define ADS7828_CMD_SD_SE 0x80 /* Single ended inputs */ #define ADS7828_CMD_PD1 0x04 /* Internal vref OFF && A/D ON */ #define ADS7828_CMD_PD3 0x0C /* Internal vref ON && A/D ON */ @@ -50,17 +48,9 @@ enum ads7828_chips { ads7828, ads7830 }; /* Client specific data */ struct ads7828_data { - struct i2c_client *client; - struct mutex update_lock; /* Mutex protecting updates */ - unsigned long last_updated; /* Last updated time (in jiffies) */ - u16 adc_input[ADS7828_NCH]; /* ADS7828_NCH samples */ - bool valid; /* Validity flag */ - bool diff_input; /* Differential input */ - bool ext_vref; /* External voltage reference */ - unsigned int vref_mv; /* voltage reference value */ + struct regmap *regmap; u8 cmd_byte; /* Command byte without channel bits */ unsigned int lsb_resol; /* Resolution of the ADC sample LSB */ - s32 (*read_channel)(const struct i2c_client *client, u8 command); }; /* Command byte C2,C1,C0 - see datasheet */ @@ -69,42 +59,22 @@ static inline u8 ads7828_cmd_byte(u8 cmd, int ch) return cmd | (((ch >> 1) | (ch & 0x01) << 2) << 4); } -/* Update data for the device (all 8 channels) */ -static struct ads7828_data *ads7828_update_device(struct device *dev) -{ - struct ads7828_data *data = dev_get_drvdata(dev); - struct i2c_client *client = data->client; - - mutex_lock(&data->update_lock); - - if (time_after(jiffies, data->last_updated + HZ + HZ / 2) - || !data->valid) { - unsigned int ch; - dev_dbg(&client->dev, "Starting ads7828 update\n"); - - for (ch = 0; ch < ADS7828_NCH; ch++) { - u8 cmd = ads7828_cmd_byte(data->cmd_byte, ch); - data->adc_input[ch] = data->read_channel(client, cmd); - } - data->last_updated = jiffies; - data->valid = true; - } - - mutex_unlock(&data->update_lock); - - return data; -} - /* sysfs callback function */ static ssize_t ads7828_show_in(struct device *dev, struct device_attribute *da, char *buf) { struct sensor_device_attribute *attr = to_sensor_dev_attr(da); - struct ads7828_data *data = ads7828_update_device(dev); - unsigned int value = DIV_ROUND_CLOSEST(data->adc_input[attr->index] * - data->lsb_resol, 1000); + struct ads7828_data *data = dev_get_drvdata(dev); + u8 cmd = ads7828_cmd_byte(data->cmd_byte, attr->index); + unsigned int regval; + int err; - return sprintf(buf, "%d\n", value); + err = regmap_read(data->regmap, cmd, ®val); + if (err < 0) + return err; + + return sprintf(buf, "%d\n", + DIV_ROUND_CLOSEST(regval * data->lsb_resol, 1000)); } static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, ads7828_show_in, NULL, 0); @@ -130,6 +100,16 @@ static struct attribute *ads7828_attrs[] = { ATTRIBUTE_GROUPS(ads7828); +static const struct regmap_config ads2828_regmap_config = { + .reg_bits = 8, + .val_bits = 16, +}; + +static const struct regmap_config ads2830_regmap_config = { + .reg_bits = 8, + .val_bits = 8, +}; + static int ads7828_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -137,42 +117,40 @@ static int ads7828_probe(struct i2c_client *client, struct ads7828_platform_data *pdata = dev_get_platdata(dev); struct ads7828_data *data; struct device *hwmon_dev; + unsigned int vref_mv = ADS7828_INT_VREF_MV; + bool diff_input = false; + bool ext_vref = false; data = devm_kzalloc(dev, sizeof(struct ads7828_data), GFP_KERNEL); if (!data) return -ENOMEM; if (pdata) { - data->diff_input = pdata->diff_input; - data->ext_vref = pdata->ext_vref; - if (data->ext_vref) - data->vref_mv = pdata->vref_mv; + diff_input = pdata->diff_input; + ext_vref = pdata->ext_vref; + if (ext_vref && pdata->vref_mv) + vref_mv = pdata->vref_mv; } - /* Bound Vref with min/max values if it was provided */ - if (data->vref_mv) - data->vref_mv = clamp_val(data->vref_mv, - ADS7828_EXT_VREF_MV_MIN, - ADS7828_EXT_VREF_MV_MAX); - else - data->vref_mv = ADS7828_INT_VREF_MV; + /* Bound Vref with min/max values */ + vref_mv = clamp_val(vref_mv, ADS7828_EXT_VREF_MV_MIN, + ADS7828_EXT_VREF_MV_MAX); /* ADS7828 uses 12-bit samples, while ADS7830 is 8-bit */ if (id->driver_data == ads7828) { - data->lsb_resol = DIV_ROUND_CLOSEST(data->vref_mv * 1000, 4096); - data->read_channel = i2c_smbus_read_word_swapped; + data->lsb_resol = DIV_ROUND_CLOSEST(vref_mv * 1000, 4096); + data->regmap = devm_regmap_init_i2c(client, + &ads2828_regmap_config); } else { - data->lsb_resol = DIV_ROUND_CLOSEST(data->vref_mv * 1000, 256); - data->read_channel = i2c_smbus_read_byte_data; + data->lsb_resol = DIV_ROUND_CLOSEST(vref_mv * 1000, 256); + data->regmap = devm_regmap_init_i2c(client, + &ads2830_regmap_config); } - data->cmd_byte = data->ext_vref ? ADS7828_CMD_PD1 : ADS7828_CMD_PD3; - if (!data->diff_input) + data->cmd_byte = ext_vref ? ADS7828_CMD_PD1 : ADS7828_CMD_PD3; + if (!diff_input) data->cmd_byte |= ADS7828_CMD_SD_SE; - data->client = client; - mutex_init(&data->update_lock); - hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, data, ads7828_groups); diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index e01feba909c3..d1542b7d4bc3 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -35,6 +35,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/jiffies.h> #include <linux/of.h> +#include <linux/delay.h> #include <linux/platform_data/ina2xx.h> @@ -51,7 +52,6 @@ #define INA226_ALERT_LIMIT 0x07 #define INA226_DIE_ID 0xFF - /* register count */ #define INA219_REGISTERS 6 #define INA226_REGISTERS 8 @@ -64,6 +64,24 @@ /* worst case is 68.10 ms (~14.6Hz, ina219) */ #define INA2XX_CONVERSION_RATE 15 +#define INA2XX_MAX_DELAY 69 /* worst case delay in ms */ + +#define INA2XX_RSHUNT_DEFAULT 10000 + +/* bit mask for reading the averaging setting in the configuration register */ +#define INA226_AVG_RD_MASK 0x0E00 + +#define INA226_READ_AVG(reg) (((reg) & INA226_AVG_RD_MASK) >> 9) +#define INA226_SHIFT_AVG(val) ((val) << 9) + +/* common attrs, ina226 attrs and NULL */ +#define INA2XX_MAX_ATTRIBUTE_GROUPS 3 + +/* + * Both bus voltage and shunt voltage conversion times for ina226 are set + * to 0b0100 on POR, which translates to 2200 microseconds in total. + */ +#define INA226_TOTAL_CONV_TIME_DEFAULT 2200 enum ina2xx_ids { ina219, ina226 }; @@ -81,11 +99,16 @@ struct ina2xx_data { struct i2c_client *client; const struct ina2xx_config *config; + long rshunt; + u16 curr_config; + struct mutex update_lock; bool valid; unsigned long last_updated; + int update_interval; /* in jiffies */ int kind; + const struct attribute_group *groups[INA2XX_MAX_ATTRIBUTE_GROUPS]; u16 regs[INA2XX_MAX_REGISTERS]; }; @@ -110,34 +133,156 @@ static const struct ina2xx_config ina2xx_config[] = { }, }; -static struct ina2xx_data *ina2xx_update_device(struct device *dev) +/* + * Available averaging rates for ina226. The indices correspond with + * the bit values expected by the chip (according to the ina226 datasheet, + * table 3 AVG bit settings, found at + * http://www.ti.com/lit/ds/symlink/ina226.pdf. + */ +static const int ina226_avg_tab[] = { 1, 4, 16, 64, 128, 256, 512, 1024 }; + +static int ina226_avg_bits(int avg) +{ + int i; + + /* Get the closest average from the tab. */ + for (i = 0; i < ARRAY_SIZE(ina226_avg_tab) - 1; i++) { + if (avg <= (ina226_avg_tab[i] + ina226_avg_tab[i + 1]) / 2) + break; + } + + return i; /* Return 0b0111 for values greater than 1024. */ +} + +static int ina226_reg_to_interval(u16 config) +{ + int avg = ina226_avg_tab[INA226_READ_AVG(config)]; + + /* + * Multiply the total conversion time by the number of averages. + * Return the result in milliseconds. + */ + return DIV_ROUND_CLOSEST(avg * INA226_TOTAL_CONV_TIME_DEFAULT, 1000); +} + +static u16 ina226_interval_to_reg(int interval, u16 config) +{ + int avg, avg_bits; + + avg = DIV_ROUND_CLOSEST(interval * 1000, + INA226_TOTAL_CONV_TIME_DEFAULT); + avg_bits = ina226_avg_bits(avg); + + return (config & ~INA226_AVG_RD_MASK) | INA226_SHIFT_AVG(avg_bits); +} + +static void ina226_set_update_interval(struct ina2xx_data *data) +{ + int ms; + + ms = ina226_reg_to_interval(data->curr_config); + data->update_interval = msecs_to_jiffies(ms); +} + +static int ina2xx_calibrate(struct ina2xx_data *data) +{ + u16 val = DIV_ROUND_CLOSEST(data->config->calibration_factor, + data->rshunt); + + return i2c_smbus_write_word_swapped(data->client, + INA2XX_CALIBRATION, val); +} + +/* + * Initialize the configuration and calibration registers. + */ +static int ina2xx_init(struct ina2xx_data *data) { - struct ina2xx_data *data = dev_get_drvdata(dev); struct i2c_client *client = data->client; - struct ina2xx_data *ret = data; + int ret; - mutex_lock(&data->update_lock); + /* device configuration */ + ret = i2c_smbus_write_word_swapped(client, INA2XX_CONFIG, + data->curr_config); + if (ret < 0) + return ret; - if (time_after(jiffies, data->last_updated + - HZ / INA2XX_CONVERSION_RATE) || !data->valid) { + /* + * Set current LSB to 1mA, shunt is in uOhms + * (equation 13 in datasheet). + */ + return ina2xx_calibrate(data); +} - int i; +static int ina2xx_do_update(struct device *dev) +{ + struct ina2xx_data *data = dev_get_drvdata(dev); + struct i2c_client *client = data->client; + int i, rv, retry; - dev_dbg(&client->dev, "Starting ina2xx update\n"); + dev_dbg(&client->dev, "Starting ina2xx update\n"); + for (retry = 5; retry; retry--) { /* Read all registers */ for (i = 0; i < data->config->registers; i++) { - int rv = i2c_smbus_read_word_swapped(client, i); - if (rv < 0) { - ret = ERR_PTR(rv); - goto abort; - } + rv = i2c_smbus_read_word_swapped(client, i); + if (rv < 0) + return rv; data->regs[i] = rv; } + + /* + * If the current value in the calibration register is 0, the + * power and current registers will also remain at 0. In case + * the chip has been reset let's check the calibration + * register and reinitialize if needed. + */ + if (data->regs[INA2XX_CALIBRATION] == 0) { + dev_warn(dev, "chip not calibrated, reinitializing\n"); + + rv = ina2xx_init(data); + if (rv < 0) + return rv; + + /* + * Let's make sure the power and current registers + * have been updated before trying again. + */ + msleep(INA2XX_MAX_DELAY); + continue; + } + data->last_updated = jiffies; data->valid = 1; + + return 0; } -abort: + + /* + * If we're here then although all write operations succeeded, the + * chip still returns 0 in the calibration register. Nothing more we + * can do here. + */ + dev_err(dev, "unable to reinitialize the chip\n"); + return -ENODEV; +} + +static struct ina2xx_data *ina2xx_update_device(struct device *dev) +{ + struct ina2xx_data *data = dev_get_drvdata(dev); + struct ina2xx_data *ret = data; + unsigned long after; + int rv; + + mutex_lock(&data->update_lock); + + after = data->last_updated + data->update_interval; + if (time_after(jiffies, after) || !data->valid) { + rv = ina2xx_do_update(dev); + if (rv < 0) + ret = ERR_PTR(rv); + } + mutex_unlock(&data->update_lock); return ret; } @@ -164,6 +309,10 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg) /* signed register, LSB=1mA (selected), in mA */ val = (s16)data->regs[reg]; break; + case INA2XX_CALIBRATION: + val = DIV_ROUND_CLOSEST(data->config->calibration_factor, + data->regs[reg]); + break; default: /* programmer goofed */ WARN_ON_ONCE(1); @@ -187,6 +336,85 @@ static ssize_t ina2xx_show_value(struct device *dev, ina2xx_get_value(data, attr->index)); } +static ssize_t ina2xx_set_shunt(struct device *dev, + struct device_attribute *da, + const char *buf, size_t count) +{ + struct ina2xx_data *data = ina2xx_update_device(dev); + unsigned long val; + int status; + + if (IS_ERR(data)) + return PTR_ERR(data); + + status = kstrtoul(buf, 10, &val); + if (status < 0) + return status; + + if (val == 0 || + /* Values greater than the calibration factor make no sense. */ + val > data->config->calibration_factor) + return -EINVAL; + + mutex_lock(&data->update_lock); + data->rshunt = val; + status = ina2xx_calibrate(data); + mutex_unlock(&data->update_lock); + if (status < 0) + return status; + + return count; +} + +static ssize_t ina226_set_interval(struct device *dev, + struct device_attribute *da, + const char *buf, size_t count) +{ + struct ina2xx_data *data = dev_get_drvdata(dev); + unsigned long val; + int status; + + status = kstrtoul(buf, 10, &val); + if (status < 0) + return status; + + if (val > INT_MAX || val == 0) + return -EINVAL; + + mutex_lock(&data->update_lock); + data->curr_config = ina226_interval_to_reg(val, + data->regs[INA2XX_CONFIG]); + status = i2c_smbus_write_word_swapped(data->client, + INA2XX_CONFIG, + data->curr_config); + + ina226_set_update_interval(data); + /* Make sure the next access re-reads all registers. */ + data->valid = 0; + mutex_unlock(&data->update_lock); + if (status < 0) + return status; + + return count; +} + +static ssize_t ina226_show_interval(struct device *dev, + struct device_attribute *da, char *buf) +{ + struct ina2xx_data *data = ina2xx_update_device(dev); + + if (IS_ERR(data)) + return PTR_ERR(data); + + /* + * We don't use data->update_interval here as we want to display + * the actual interval used by the chip and jiffies_to_msecs() + * doesn't seem to be accurate enough. + */ + return snprintf(buf, PAGE_SIZE, "%d\n", + ina226_reg_to_interval(data->regs[INA2XX_CONFIG])); +} + /* shunt voltage */ static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, ina2xx_show_value, NULL, INA2XX_SHUNT_VOLTAGE); @@ -203,15 +431,37 @@ static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO, ina2xx_show_value, NULL, static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL, INA2XX_POWER); +/* shunt resistance */ +static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR, + ina2xx_show_value, ina2xx_set_shunt, + INA2XX_CALIBRATION); + +/* update interval (ina226 only) */ +static SENSOR_DEVICE_ATTR(update_interval, S_IRUGO | S_IWUSR, + ina226_show_interval, ina226_set_interval, 0); + /* pointers to created device attributes */ static struct attribute *ina2xx_attrs[] = { &sensor_dev_attr_in0_input.dev_attr.attr, &sensor_dev_attr_in1_input.dev_attr.attr, &sensor_dev_attr_curr1_input.dev_attr.attr, &sensor_dev_attr_power1_input.dev_attr.attr, + &sensor_dev_attr_shunt_resistor.dev_attr.attr, NULL, }; -ATTRIBUTE_GROUPS(ina2xx); + +static const struct attribute_group ina2xx_group = { + .attrs = ina2xx_attrs, +}; + +static struct attribute *ina226_attrs[] = { + &sensor_dev_attr_update_interval.dev_attr.attr, + NULL, +}; + +static const struct attribute_group ina226_group = { + .attrs = ina226_attrs, +}; static int ina2xx_probe(struct i2c_client *client, const struct i2c_device_id *id) @@ -221,9 +471,8 @@ static int ina2xx_probe(struct i2c_client *client, struct device *dev = &client->dev; struct ina2xx_data *data; struct device *hwmon_dev; - long shunt = 10000; /* default shunt value 10mOhms */ u32 val; - int ret; + int ret, group = 0; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WORD_DATA)) return -ENODEV; @@ -234,50 +483,52 @@ static int ina2xx_probe(struct i2c_client *client, if (dev_get_platdata(dev)) { pdata = dev_get_platdata(dev); - shunt = pdata->shunt_uohms; + data->rshunt = pdata->shunt_uohms; } else if (!of_property_read_u32(dev->of_node, "shunt-resistor", &val)) { - shunt = val; + data->rshunt = val; + } else { + data->rshunt = INA2XX_RSHUNT_DEFAULT; } - if (shunt <= 0) - return -ENODEV; - /* set the device type */ data->kind = id->driver_data; data->config = &ina2xx_config[data->kind]; - - /* device configuration */ - ret = i2c_smbus_write_word_swapped(client, INA2XX_CONFIG, - data->config->config_default); - if (ret < 0) { - dev_err(dev, - "error writing to the config register: %d", ret); - return -ENODEV; - } + data->curr_config = data->config->config_default; + data->client = client; /* - * Set current LSB to 1mA, shunt is in uOhms - * (equation 13 in datasheet). + * Ina226 has a variable update_interval. For ina219 we + * use a constant value. */ - ret = i2c_smbus_write_word_swapped(client, INA2XX_CALIBRATION, - data->config->calibration_factor / shunt); + if (data->kind == ina226) + ina226_set_update_interval(data); + else + data->update_interval = HZ / INA2XX_CONVERSION_RATE; + + if (data->rshunt <= 0 || + data->rshunt > data->config->calibration_factor) + return -ENODEV; + + ret = ina2xx_init(data); if (ret < 0) { - dev_err(dev, - "error writing to the calibration register: %d", ret); + dev_err(dev, "error configuring the device: %d\n", ret); return -ENODEV; } - data->client = client; mutex_init(&data->update_lock); + data->groups[group++] = &ina2xx_group; + if (data->kind == ina226) + data->groups[group++] = &ina226_group; + hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, - data, ina2xx_groups); + data, data->groups); if (IS_ERR(hwmon_dev)) return PTR_ERR(hwmon_dev); dev_info(dev, "power monitor %s (Rshunt = %li uOhm)\n", - id->name, shunt); + id->name, data->rshunt); return 0; } @@ -287,6 +538,7 @@ static const struct i2c_device_id ina2xx_id[] = { { "ina220", ina219 }, { "ina226", ina226 }, { "ina230", ina226 }, + { "ina231", ina226 }, { } }; MODULE_DEVICE_TABLE(i2c, ina2xx_id); diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c index 388f8bcd898e..996bdfd5cf25 100644 --- a/drivers/hwmon/jc42.c +++ b/drivers/hwmon/jc42.c @@ -201,7 +201,7 @@ struct jc42_data { #define JC42_TEMP_MIN 0 #define JC42_TEMP_MAX 125000 -static u16 jc42_temp_to_reg(int temp, bool extended) +static u16 jc42_temp_to_reg(long temp, bool extended) { int ntemp = clamp_val(temp, extended ? JC42_TEMP_MIN_EXTENDED : @@ -213,11 +213,7 @@ static u16 jc42_temp_to_reg(int temp, bool extended) static int jc42_temp_from_reg(s16 reg) { - reg &= 0x1fff; - - /* sign extend register */ - if (reg & 0x1000) - reg |= 0xf000; + reg = sign_extend32(reg, 12); /* convert from 0.0625 to 0.001 resolution */ return reg * 125 / 2; @@ -308,15 +304,18 @@ static ssize_t set_temp_crit_hyst(struct device *dev, const char *buf, size_t count) { struct jc42_data *data = dev_get_drvdata(dev); - unsigned long val; + long val; int diff, hyst; int err; int ret = count; - if (kstrtoul(buf, 10, &val) < 0) + if (kstrtol(buf, 10, &val) < 0) return -EINVAL; + val = clamp_val(val, (data->extended ? JC42_TEMP_MIN_EXTENDED : + JC42_TEMP_MIN) - 6000, JC42_TEMP_MAX); diff = jc42_temp_from_reg(data->temp[t_crit]) - val; + hyst = 0; if (diff > 0) { if (diff < 2250) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index ec5678289e4a..55765790907b 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -779,7 +779,7 @@ static bool nct7802_regmap_is_volatile(struct device *dev, unsigned int reg) return reg != REG_BANK && reg <= 0x20; } -static struct regmap_config nct7802_regmap_config = { +static const struct regmap_config nct7802_regmap_config = { .reg_bits = 8, .val_bits = 8, .cache_type = REGCACHE_RBTREE, diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c index ba9f478f64ee..9da2735f1424 100644 --- a/drivers/hwmon/tmp102.c +++ b/drivers/hwmon/tmp102.c @@ -253,7 +253,7 @@ static int tmp102_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int tmp102_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); @@ -279,17 +279,10 @@ static int tmp102_resume(struct device *dev) config &= ~TMP102_CONF_SD; return i2c_smbus_write_word_swapped(client, TMP102_CONF_REG, config); } - -static const struct dev_pm_ops tmp102_dev_pm_ops = { - .suspend = tmp102_suspend, - .resume = tmp102_resume, -}; - -#define TMP102_DEV_PM_OPS (&tmp102_dev_pm_ops) -#else -#define TMP102_DEV_PM_OPS NULL #endif /* CONFIG_PM */ +static SIMPLE_DEV_PM_OPS(tmp102_dev_pm_ops, tmp102_suspend, tmp102_resume); + static const struct i2c_device_id tmp102_id[] = { { "tmp102", 0 }, { } @@ -298,7 +291,7 @@ MODULE_DEVICE_TABLE(i2c, tmp102_id); static struct i2c_driver tmp102_driver = { .driver.name = DRIVER_NAME, - .driver.pm = TMP102_DEV_PM_OPS, + .driver.pm = &tmp102_dev_pm_ops, .probe = tmp102_probe, .remove = tmp102_remove, .id_table = tmp102_id, diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 98024856df07..59de6364a910 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4284,7 +4284,6 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int id) } struct irq_remap_ops amd_iommu_irq_ops = { - .supported = amd_iommu_supported, .prepare = amd_iommu_prepare, .enable = amd_iommu_enable, .disable = amd_iommu_disable, diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index b0522f15730f..9a20248e7068 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2014,9 +2014,6 @@ static bool detect_ivrs(void) /* Make sure ACS will be enabled during PCI probe */ pci_request_acs(); - if (!disable_irq_remap) - amd_iommu_irq_remap = true; - return true; } @@ -2123,12 +2120,14 @@ static int __init iommu_go_to_state(enum iommu_init_state state) #ifdef CONFIG_IRQ_REMAP int __init amd_iommu_prepare(void) { - return iommu_go_to_state(IOMMU_ACPI_FINISHED); -} + int ret; -int __init amd_iommu_supported(void) -{ - return amd_iommu_irq_remap ? 1 : 0; + amd_iommu_irq_remap = true; + + ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); + if (ret) + return ret; + return amd_iommu_irq_remap ? 0 : -ENODEV; } int __init amd_iommu_enable(void) diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 95ed6deae47f..861af9d8338a 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -33,7 +33,6 @@ extern void amd_iommu_init_notifier(void); extern void amd_iommu_init_api(void); /* Needed for interrupt remapping */ -extern int amd_iommu_supported(void); extern int amd_iommu_prepare(void); extern int amd_iommu_enable(void); extern void amd_iommu_disable(void); diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index a55b207b9425..14de1ab223c8 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -32,8 +32,9 @@ struct hpet_scope { }; #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) -#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) +#define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8) +static int __read_mostly eim_mode; static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static struct hpet_scope ir_hpet[MAX_HPET_TBS]; @@ -481,11 +482,11 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (iommu->ir_table) return 0; - ir_table = kzalloc(sizeof(struct ir_table), GFP_ATOMIC); + ir_table = kzalloc(sizeof(struct ir_table), GFP_KERNEL); if (!ir_table) return -ENOMEM; - pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, + pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); if (!pages) { @@ -566,13 +567,27 @@ static int __init dmar_x2apic_optout(void) return dmar->flags & DMAR_X2APIC_OPT_OUT; } -static int __init intel_irq_remapping_supported(void) +static void __init intel_cleanup_irq_remapping(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + for_each_iommu(iommu, drhd) { + if (ecap_ir_support(iommu->ecap)) { + iommu_disable_irq_remapping(iommu); + intel_teardown_irq_remapping(iommu); + } + } + + if (x2apic_supported()) + pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n"); +} + +static int __init intel_prepare_irq_remapping(void) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - if (disable_irq_remap) - return 0; if (irq_remap_broken) { printk(KERN_WARNING "This system BIOS has enabled interrupt remapping\n" @@ -581,38 +596,45 @@ static int __init intel_irq_remapping_supported(void) "interrupt remapping is being disabled. Please\n" "contact your BIOS vendor for an update\n"); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - disable_irq_remap = 1; - return 0; + return -ENODEV; } + if (dmar_table_init() < 0) + return -ENODEV; + if (!dmar_ir_support()) - return 0; + return -ENODEV; + + if (parse_ioapics_under_ir() != 1) { + printk(KERN_INFO "Not enabling interrupt remapping\n"); + goto error; + } + /* First make sure all IOMMUs support IRQ remapping */ for_each_iommu(iommu, drhd) if (!ecap_ir_support(iommu->ecap)) - return 0; + goto error; - return 1; + /* Do the allocations early */ + for_each_iommu(iommu, drhd) + if (intel_setup_irq_remapping(iommu)) + goto error; + + return 0; + +error: + intel_cleanup_irq_remapping(); + return -ENODEV; } static int __init intel_enable_irq_remapping(void) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - bool x2apic_present; int setup = 0; int eim = 0; - x2apic_present = x2apic_supported(); - - if (parse_ioapics_under_ir() != 1) { - printk(KERN_INFO "Not enable interrupt remapping\n"); - goto error; - } - - if (x2apic_present) { - pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); - + if (x2apic_supported()) { eim = !dmar_x2apic_optout(); if (!eim) printk(KERN_WARNING @@ -646,16 +668,15 @@ static int __init intel_enable_irq_remapping(void) /* * check for the Interrupt-remapping support */ - for_each_iommu(iommu, drhd) { - if (!ecap_ir_support(iommu->ecap)) - continue; - + for_each_iommu(iommu, drhd) if (eim && !ecap_eim_support(iommu->ecap)) { printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); - goto error; + eim = 0; } - } + eim_mode = eim; + if (eim) + pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); /* * Enable queued invalidation for all the DRHD's. @@ -675,12 +696,6 @@ static int __init intel_enable_irq_remapping(void) * Setup Interrupt-remapping for all the DRHD's now. */ for_each_iommu(iommu, drhd) { - if (!ecap_ir_support(iommu->ecap)) - continue; - - if (intel_setup_irq_remapping(iommu)) - goto error; - iommu_set_irq_remapping(iommu, eim); setup = 1; } @@ -702,15 +717,7 @@ static int __init intel_enable_irq_remapping(void) return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; error: - for_each_iommu(iommu, drhd) - if (ecap_ir_support(iommu->ecap)) { - iommu_disable_irq_remapping(iommu); - intel_teardown_irq_remapping(iommu); - } - - if (x2apic_present) - pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n"); - + intel_cleanup_irq_remapping(); return -1; } @@ -1199,8 +1206,7 @@ static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id) } struct irq_remap_ops intel_irq_remap_ops = { - .supported = intel_irq_remapping_supported, - .prepare = dmar_table_init, + .prepare = intel_prepare_irq_remapping, .enable = intel_enable_irq_remapping, .disable = disable_irq_remapping, .reenable = reenable_irq_remapping, diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 89c4846683be..390079ee1350 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -17,12 +17,11 @@ #include "irq_remapping.h" int irq_remapping_enabled; - -int disable_irq_remap; int irq_remap_broken; int disable_sourceid_checking; int no_x2apic_optout; +static int disable_irq_remap; static struct irq_remap_ops *remap_ops; static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); @@ -194,45 +193,32 @@ static __init int setup_irqremap(char *str) } early_param("intremap", setup_irqremap); -void __init setup_irq_remapping_ops(void) -{ - remap_ops = &intel_irq_remap_ops; - -#ifdef CONFIG_AMD_IOMMU - if (amd_iommu_irq_ops.prepare() == 0) - remap_ops = &amd_iommu_irq_ops; -#endif -} - void set_irq_remapping_broken(void) { irq_remap_broken = 1; } -int irq_remapping_supported(void) +int __init irq_remapping_prepare(void) { if (disable_irq_remap) - return 0; - - if (!remap_ops || !remap_ops->supported) - return 0; - - return remap_ops->supported(); -} + return -ENOSYS; -int __init irq_remapping_prepare(void) -{ - if (!remap_ops || !remap_ops->prepare) - return -ENODEV; + if (intel_irq_remap_ops.prepare() == 0) + remap_ops = &intel_irq_remap_ops; + else if (IS_ENABLED(CONFIG_AMD_IOMMU) && + amd_iommu_irq_ops.prepare() == 0) + remap_ops = &amd_iommu_irq_ops; + else + return -ENOSYS; - return remap_ops->prepare(); + return 0; } int __init irq_remapping_enable(void) { int ret; - if (!remap_ops || !remap_ops->enable) + if (!remap_ops->enable) return -ENODEV; ret = remap_ops->enable(); @@ -245,22 +231,16 @@ int __init irq_remapping_enable(void) void irq_remapping_disable(void) { - if (!irq_remapping_enabled || - !remap_ops || - !remap_ops->disable) - return; - - remap_ops->disable(); + if (irq_remapping_enabled && remap_ops->disable) + remap_ops->disable(); } int irq_remapping_reenable(int mode) { - if (!irq_remapping_enabled || - !remap_ops || - !remap_ops->reenable) - return 0; + if (irq_remapping_enabled && remap_ops->reenable) + return remap_ops->reenable(mode); - return remap_ops->reenable(mode); + return 0; } int __init irq_remap_enable_fault_handling(void) @@ -268,7 +248,7 @@ int __init irq_remap_enable_fault_handling(void) if (!irq_remapping_enabled) return 0; - if (!remap_ops || !remap_ops->enable_faulting) + if (!remap_ops->enable_faulting) return -ENODEV; return remap_ops->enable_faulting(); @@ -279,7 +259,7 @@ int setup_ioapic_remapped_entry(int irq, unsigned int destination, int vector, struct io_apic_irq_attr *attr) { - if (!remap_ops || !remap_ops->setup_ioapic_entry) + if (!remap_ops->setup_ioapic_entry) return -ENODEV; return remap_ops->setup_ioapic_entry(irq, entry, destination, @@ -289,8 +269,7 @@ int setup_ioapic_remapped_entry(int irq, static int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - if (!config_enabled(CONFIG_SMP) || !remap_ops || - !remap_ops->set_affinity) + if (!config_enabled(CONFIG_SMP) || !remap_ops->set_affinity) return 0; return remap_ops->set_affinity(data, mask, force); @@ -300,10 +279,7 @@ void free_remapped_irq(int irq) { struct irq_cfg *cfg = irq_cfg(irq); - if (!remap_ops || !remap_ops->free_irq) - return; - - if (irq_remapped(cfg)) + if (irq_remapped(cfg) && remap_ops->free_irq) remap_ops->free_irq(irq); } @@ -315,13 +291,13 @@ void compose_remapped_msi_msg(struct pci_dev *pdev, if (!irq_remapped(cfg)) native_compose_msi_msg(pdev, irq, dest, msg, hpet_id); - else if (remap_ops && remap_ops->compose_msi_msg) + else if (remap_ops->compose_msi_msg) remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); } static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) { - if (!remap_ops || !remap_ops->msi_alloc_irq) + if (!remap_ops->msi_alloc_irq) return -ENODEV; return remap_ops->msi_alloc_irq(pdev, irq, nvec); @@ -330,7 +306,7 @@ static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, int index, int sub_handle) { - if (!remap_ops || !remap_ops->msi_setup_irq) + if (!remap_ops->msi_setup_irq) return -ENODEV; return remap_ops->msi_setup_irq(pdev, irq, index, sub_handle); @@ -340,7 +316,7 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { int ret; - if (!remap_ops || !remap_ops->alloc_hpet_msi) + if (!remap_ops->alloc_hpet_msi) return -ENODEV; ret = remap_ops->alloc_hpet_msi(irq, id); diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index fde250f86e60..c448eb48340a 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -31,16 +31,12 @@ struct cpumask; struct pci_dev; struct msi_msg; -extern int disable_irq_remap; extern int irq_remap_broken; extern int disable_sourceid_checking; extern int no_x2apic_optout; extern int irq_remapping_enabled; struct irq_remap_ops { - /* Check whether Interrupt Remapping is supported */ - int (*supported)(void); - /* Initializes hardware and makes it ready for remapping interrupts */ int (*prepare)(void); @@ -89,7 +85,6 @@ extern struct irq_remap_ops amd_iommu_irq_ops; #else /* CONFIG_IRQ_REMAP */ #define irq_remapping_enabled 0 -#define disable_irq_remap 1 #define irq_remap_broken 0 #endif /* CONFIG_IRQ_REMAP */ diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 5bdedf6df153..c355a226a024 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -5,6 +5,7 @@ menuconfig MD bool "Multiple devices driver support (RAID and LVM)" depends on BLOCK + select SRCU help Support multiple physical spindles through a single logical device. Required for RAID and logical volume management. diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index d6607ee9c855..84673ebcf428 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -197,6 +197,7 @@ config NETCONSOLE_DYNAMIC config NETPOLL def_bool NETCONSOLE + select SRCU config NET_POLL_CONTROLLER def_bool NETPOLL diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index c3a60b57a865..a6f116aa5235 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -414,6 +414,14 @@ config REGULATOR_MAX77802 Exynos5420/Exynos5800 SoCs to control various voltages. It includes support for control of voltage and ramp speed. +config REGULATOR_MAX77843 + tristate "Maxim 77843 regulator" + depends on MFD_MAX77843 + help + This driver controls a Maxim 77843 regulator. + The regulator include two 'SAFEOUT' for USB(Universal Serial Bus) + This is suitable for Exynos5433 SoC chips. + config REGULATOR_MC13XXX_CORE tristate @@ -433,6 +441,15 @@ config REGULATOR_MC13892 Say y here to support the regulators found on the Freescale MC13892 PMIC. +config REGULATOR_MT6397 + tristate "MediaTek MT6397 PMIC" + depends on MFD_MT6397 + help + Say y here to select this option to enable the power regulator of + MediaTek MT6397 PMIC. + This driver supports the control of different power rails of device + through regulator interface. + config REGULATOR_PALMAS tristate "TI Palmas PMIC Regulators" depends on MFD_PALMAS diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 1f28ebfc6f3a..2c4da15e1545 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -55,9 +55,11 @@ obj-$(CONFIG_REGULATOR_MAX8998) += max8998.o obj-$(CONFIG_REGULATOR_MAX77686) += max77686.o obj-$(CONFIG_REGULATOR_MAX77693) += max77693.o obj-$(CONFIG_REGULATOR_MAX77802) += max77802.o +obj-$(CONFIG_REGULATOR_MAX77843) += max77843.o obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o obj-$(CONFIG_REGULATOR_MC13XXX_CORE) += mc13xxx-regulator-core.o +obj-$(CONFIG_REGULATOR_MT6397) += mt6397-regulator.o obj-$(CONFIG_REGULATOR_QCOM_RPM) += qcom_rpm-regulator.o obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o obj-$(CONFIG_REGULATOR_PFUZE100) += pfuze100-regulator.o diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index f23d7e1f2ee7..e4331f5e5d7d 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -32,11 +32,13 @@ #define AXP20X_FREQ_DCDC_MASK 0x0f -#define AXP20X_DESC_IO(_id, _supply, _min, _max, _step, _vreg, _vmask, _ereg, \ - _emask, _enable_val, _disable_val) \ +#define AXP20X_DESC_IO(_id, _match, _supply, _min, _max, _step, _vreg, _vmask, \ + _ereg, _emask, _enable_val, _disable_val) \ [AXP20X_##_id] = { \ .name = #_id, \ .supply_name = (_supply), \ + .of_match = of_match_ptr(_match), \ + .regulators_node = of_match_ptr("regulators"), \ .type = REGULATOR_VOLTAGE, \ .id = AXP20X_##_id, \ .n_voltages = (((_max) - (_min)) / (_step) + 1), \ @@ -52,11 +54,13 @@ .ops = &axp20x_ops, \ } -#define AXP20X_DESC(_id, _supply, _min, _max, _step, _vreg, _vmask, _ereg, \ - _emask) \ +#define AXP20X_DESC(_id, _match, _supply, _min, _max, _step, _vreg, _vmask, \ + _ereg, _emask) \ [AXP20X_##_id] = { \ .name = #_id, \ .supply_name = (_supply), \ + .of_match = of_match_ptr(_match), \ + .regulators_node = of_match_ptr("regulators"), \ .type = REGULATOR_VOLTAGE, \ .id = AXP20X_##_id, \ .n_voltages = (((_max) - (_min)) / (_step) + 1), \ @@ -70,10 +74,12 @@ .ops = &axp20x_ops, \ } -#define AXP20X_DESC_FIXED(_id, _supply, _volt) \ +#define AXP20X_DESC_FIXED(_id, _match, _supply, _volt) \ [AXP20X_##_id] = { \ .name = #_id, \ .supply_name = (_supply), \ + .of_match = of_match_ptr(_match), \ + .regulators_node = of_match_ptr("regulators"), \ .type = REGULATOR_VOLTAGE, \ .id = AXP20X_##_id, \ .n_voltages = 1, \ @@ -82,10 +88,13 @@ .ops = &axp20x_ops_fixed \ } -#define AXP20X_DESC_TABLE(_id, _supply, _table, _vreg, _vmask, _ereg, _emask) \ +#define AXP20X_DESC_TABLE(_id, _match, _supply, _table, _vreg, _vmask, _ereg, \ + _emask) \ [AXP20X_##_id] = { \ .name = #_id, \ .supply_name = (_supply), \ + .of_match = of_match_ptr(_match), \ + .regulators_node = of_match_ptr("regulators"), \ .type = REGULATOR_VOLTAGE, \ .id = AXP20X_##_id, \ .n_voltages = ARRAY_SIZE(_table), \ @@ -127,36 +136,20 @@ static struct regulator_ops axp20x_ops = { }; static const struct regulator_desc axp20x_regulators[] = { - AXP20X_DESC(DCDC2, "vin2", 700, 2275, 25, AXP20X_DCDC2_V_OUT, 0x3f, - AXP20X_PWR_OUT_CTRL, 0x10), - AXP20X_DESC(DCDC3, "vin3", 700, 3500, 25, AXP20X_DCDC3_V_OUT, 0x7f, - AXP20X_PWR_OUT_CTRL, 0x02), - AXP20X_DESC_FIXED(LDO1, "acin", 1300), - AXP20X_DESC(LDO2, "ldo24in", 1800, 3300, 100, AXP20X_LDO24_V_OUT, 0xf0, - AXP20X_PWR_OUT_CTRL, 0x04), - AXP20X_DESC(LDO3, "ldo3in", 700, 3500, 25, AXP20X_LDO3_V_OUT, 0x7f, - AXP20X_PWR_OUT_CTRL, 0x40), - AXP20X_DESC_TABLE(LDO4, "ldo24in", axp20x_ldo4_data, AXP20X_LDO24_V_OUT, 0x0f, - AXP20X_PWR_OUT_CTRL, 0x08), - AXP20X_DESC_IO(LDO5, "ldo5in", 1800, 3300, 100, AXP20X_LDO5_V_OUT, 0xf0, - AXP20X_GPIO0_CTRL, 0x07, AXP20X_IO_ENABLED, - AXP20X_IO_DISABLED), -}; - -#define AXP_MATCH(_name, _id) \ - [AXP20X_##_id] = { \ - .name = #_name, \ - .driver_data = (void *) &axp20x_regulators[AXP20X_##_id], \ - } - -static struct of_regulator_match axp20x_matches[] = { - AXP_MATCH(dcdc2, DCDC2), - AXP_MATCH(dcdc3, DCDC3), - AXP_MATCH(ldo1, LDO1), - AXP_MATCH(ldo2, LDO2), - AXP_MATCH(ldo3, LDO3), - AXP_MATCH(ldo4, LDO4), - AXP_MATCH(ldo5, LDO5), + AXP20X_DESC(DCDC2, "dcdc2", "vin2", 700, 2275, 25, AXP20X_DCDC2_V_OUT, + 0x3f, AXP20X_PWR_OUT_CTRL, 0x10), + AXP20X_DESC(DCDC3, "dcdc3", "vin3", 700, 3500, 25, AXP20X_DCDC3_V_OUT, + 0x7f, AXP20X_PWR_OUT_CTRL, 0x02), + AXP20X_DESC_FIXED(LDO1, "ldo1", "acin", 1300), + AXP20X_DESC(LDO2, "ldo2", "ldo24in", 1800, 3300, 100, + AXP20X_LDO24_V_OUT, 0xf0, AXP20X_PWR_OUT_CTRL, 0x04), + AXP20X_DESC(LDO3, "ldo3", "ldo3in", 700, 3500, 25, AXP20X_LDO3_V_OUT, + 0x7f, AXP20X_PWR_OUT_CTRL, 0x40), + AXP20X_DESC_TABLE(LDO4, "ldo4", "ldo24in", axp20x_ldo4_data, + AXP20X_LDO24_V_OUT, 0x0f, AXP20X_PWR_OUT_CTRL, 0x08), + AXP20X_DESC_IO(LDO5, "ldo5", "ldo5in", 1800, 3300, 100, + AXP20X_LDO5_V_OUT, 0xf0, AXP20X_GPIO0_CTRL, 0x07, + AXP20X_IO_ENABLED, AXP20X_IO_DISABLED), }; static int axp20x_set_dcdc_freq(struct platform_device *pdev, u32 dcdcfreq) @@ -193,13 +186,6 @@ static int axp20x_regulator_parse_dt(struct platform_device *pdev) if (!regulators) { dev_warn(&pdev->dev, "regulators node not found\n"); } else { - ret = of_regulator_match(&pdev->dev, regulators, axp20x_matches, - ARRAY_SIZE(axp20x_matches)); - if (ret < 0) { - dev_err(&pdev->dev, "Error parsing regulator init data: %d\n", ret); - return ret; - } - dcdcfreq = 1500; of_property_read_u32(regulators, "x-powers,dcdc-freq", &dcdcfreq); ret = axp20x_set_dcdc_freq(pdev, dcdcfreq); @@ -233,23 +219,17 @@ static int axp20x_regulator_probe(struct platform_device *pdev) { struct regulator_dev *rdev; struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent); - struct regulator_config config = { }; - struct regulator_init_data *init_data; + struct regulator_config config = { + .dev = pdev->dev.parent, + .regmap = axp20x->regmap, + }; int ret, i; u32 workmode; - ret = axp20x_regulator_parse_dt(pdev); - if (ret) - return ret; + /* This only sets the dcdc freq. Ignore any errors */ + axp20x_regulator_parse_dt(pdev); for (i = 0; i < AXP20X_REG_ID_MAX; i++) { - init_data = axp20x_matches[i].init_data; - - config.dev = pdev->dev.parent; - config.init_data = init_data; - config.regmap = axp20x->regmap; - config.of_node = axp20x_matches[i].of_node; - rdev = devm_regulator_register(&pdev->dev, &axp20x_regulators[i], &config); if (IS_ERR(rdev)) { @@ -259,7 +239,8 @@ static int axp20x_regulator_probe(struct platform_device *pdev) return PTR_ERR(rdev); } - ret = of_property_read_u32(axp20x_matches[i].of_node, "x-powers,dcdc-workmode", + ret = of_property_read_u32(rdev->dev.of_node, + "x-powers,dcdc-workmode", &workmode); if (!ret) { if (axp20x_set_dcdc_workmode(rdev, i, workmode)) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 9c48fb32f660..b899947d839d 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -632,49 +632,34 @@ static ssize_t regulator_bypass_show(struct device *dev, static DEVICE_ATTR(bypass, 0444, regulator_bypass_show, NULL); -/* - * These are the only attributes are present for all regulators. - * Other attributes are a function of regulator functionality. - */ -static struct attribute *regulator_dev_attrs[] = { - &dev_attr_name.attr, - &dev_attr_num_users.attr, - &dev_attr_type.attr, - NULL, -}; -ATTRIBUTE_GROUPS(regulator_dev); - -static void regulator_dev_release(struct device *dev) -{ - struct regulator_dev *rdev = dev_get_drvdata(dev); - kfree(rdev); -} - -static struct class regulator_class = { - .name = "regulator", - .dev_release = regulator_dev_release, - .dev_groups = regulator_dev_groups, -}; - /* Calculate the new optimum regulator operating mode based on the new total * consumer load. All locks held by caller */ -static void drms_uA_update(struct regulator_dev *rdev) +static int drms_uA_update(struct regulator_dev *rdev) { struct regulator *sibling; int current_uA = 0, output_uV, input_uV, err; unsigned int mode; + /* + * first check to see if we can set modes at all, otherwise just + * tell the consumer everything is OK. + */ err = regulator_check_drms(rdev); - if (err < 0 || !rdev->desc->ops->get_optimum_mode || - (!rdev->desc->ops->get_voltage && - !rdev->desc->ops->get_voltage_sel) || - !rdev->desc->ops->set_mode) - return; + if (err < 0) + return 0; + + if (!rdev->desc->ops->get_optimum_mode) + return 0; + + if (!rdev->desc->ops->set_mode) + return -EINVAL; /* get output voltage */ output_uV = _regulator_get_voltage(rdev); - if (output_uV <= 0) - return; + if (output_uV <= 0) { + rdev_err(rdev, "invalid output voltage found\n"); + return -EINVAL; + } /* get input voltage */ input_uV = 0; @@ -682,8 +667,10 @@ static void drms_uA_update(struct regulator_dev *rdev) input_uV = regulator_get_voltage(rdev->supply); if (input_uV <= 0) input_uV = rdev->constraints->input_uV; - if (input_uV <= 0) - return; + if (input_uV <= 0) { + rdev_err(rdev, "invalid input voltage found\n"); + return -EINVAL; + } /* calc total requested load */ list_for_each_entry(sibling, &rdev->consumer_list, list) @@ -695,8 +682,17 @@ static void drms_uA_update(struct regulator_dev *rdev) /* check the new mode is allowed */ err = regulator_mode_constrain(rdev, &mode); - if (err == 0) - rdev->desc->ops->set_mode(rdev, mode); + if (err < 0) { + rdev_err(rdev, "failed to get optimum mode @ %d uA %d -> %d uV\n", + current_uA, input_uV, output_uV); + return err; + } + + err = rdev->desc->ops->set_mode(rdev, mode); + if (err < 0) + rdev_err(rdev, "failed to set optimum mode %x\n", mode); + + return err; } static int suspend_set_state(struct regulator_dev *rdev, @@ -3026,75 +3022,13 @@ EXPORT_SYMBOL_GPL(regulator_get_mode); int regulator_set_optimum_mode(struct regulator *regulator, int uA_load) { struct regulator_dev *rdev = regulator->rdev; - struct regulator *consumer; - int ret, output_uV, input_uV = 0, total_uA_load = 0; - unsigned int mode; - - if (rdev->supply) - input_uV = regulator_get_voltage(rdev->supply); + int ret; mutex_lock(&rdev->mutex); - - /* - * first check to see if we can set modes at all, otherwise just - * tell the consumer everything is OK. - */ regulator->uA_load = uA_load; - ret = regulator_check_drms(rdev); - if (ret < 0) { - ret = 0; - goto out; - } - - if (!rdev->desc->ops->get_optimum_mode) - goto out; - - /* - * we can actually do this so any errors are indicators of - * potential real failure. - */ - ret = -EINVAL; - - if (!rdev->desc->ops->set_mode) - goto out; - - /* get output voltage */ - output_uV = _regulator_get_voltage(rdev); - if (output_uV <= 0) { - rdev_err(rdev, "invalid output voltage found\n"); - goto out; - } - - /* No supply? Use constraint voltage */ - if (input_uV <= 0) - input_uV = rdev->constraints->input_uV; - if (input_uV <= 0) { - rdev_err(rdev, "invalid input voltage found\n"); - goto out; - } - - /* calc total requested load for this regulator */ - list_for_each_entry(consumer, &rdev->consumer_list, list) - total_uA_load += consumer->uA_load; - - mode = rdev->desc->ops->get_optimum_mode(rdev, - input_uV, output_uV, - total_uA_load); - ret = regulator_mode_constrain(rdev, &mode); - if (ret < 0) { - rdev_err(rdev, "failed to get optimum mode @ %d uA %d -> %d uV\n", - total_uA_load, input_uV, output_uV); - goto out; - } - - ret = rdev->desc->ops->set_mode(rdev, mode); - if (ret < 0) { - rdev_err(rdev, "failed to set optimum mode %x\n", mode); - goto out; - } - ret = mode; -out: + ret = drms_uA_update(rdev); mutex_unlock(&rdev->mutex); + return ret; } EXPORT_SYMBOL_GPL(regulator_set_optimum_mode); @@ -3436,126 +3370,136 @@ int regulator_mode_to_status(unsigned int mode) } EXPORT_SYMBOL_GPL(regulator_mode_to_status); +static struct attribute *regulator_dev_attrs[] = { + &dev_attr_name.attr, + &dev_attr_num_users.attr, + &dev_attr_type.attr, + &dev_attr_microvolts.attr, + &dev_attr_microamps.attr, + &dev_attr_opmode.attr, + &dev_attr_state.attr, + &dev_attr_status.attr, + &dev_attr_bypass.attr, + &dev_attr_requested_microamps.attr, + &dev_attr_min_microvolts.attr, + &dev_attr_max_microvolts.attr, + &dev_attr_min_microamps.attr, + &dev_attr_max_microamps.attr, + &dev_attr_suspend_standby_state.attr, + &dev_attr_suspend_mem_state.attr, + &dev_attr_suspend_disk_state.attr, + &dev_attr_suspend_standby_microvolts.attr, + &dev_attr_suspend_mem_microvolts.attr, + &dev_attr_suspend_disk_microvolts.attr, + &dev_attr_suspend_standby_mode.attr, + &dev_attr_suspend_mem_mode.attr, + &dev_attr_suspend_disk_mode.attr, + NULL +}; + /* * To avoid cluttering sysfs (and memory) with useless state, only * create attributes that can be meaningfully displayed. */ -static int add_regulator_attributes(struct regulator_dev *rdev) +static umode_t regulator_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int idx) { - struct device *dev = &rdev->dev; + struct device *dev = kobj_to_dev(kobj); + struct regulator_dev *rdev = container_of(dev, struct regulator_dev, dev); const struct regulator_ops *ops = rdev->desc->ops; - int status = 0; + umode_t mode = attr->mode; + + /* these three are always present */ + if (attr == &dev_attr_name.attr || + attr == &dev_attr_num_users.attr || + attr == &dev_attr_type.attr) + return mode; /* some attributes need specific methods to be displayed */ - if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) || - (ops->get_voltage_sel && ops->get_voltage_sel(rdev) >= 0) || - (ops->list_voltage && ops->list_voltage(rdev, 0) >= 0) || - (rdev->desc->fixed_uV && (rdev->desc->n_voltages == 1))) { - status = device_create_file(dev, &dev_attr_microvolts); - if (status < 0) - return status; - } - if (ops->get_current_limit) { - status = device_create_file(dev, &dev_attr_microamps); - if (status < 0) - return status; - } - if (ops->get_mode) { - status = device_create_file(dev, &dev_attr_opmode); - if (status < 0) - return status; - } - if (rdev->ena_pin || ops->is_enabled) { - status = device_create_file(dev, &dev_attr_state); - if (status < 0) - return status; - } - if (ops->get_status) { - status = device_create_file(dev, &dev_attr_status); - if (status < 0) - return status; - } - if (ops->get_bypass) { - status = device_create_file(dev, &dev_attr_bypass); - if (status < 0) - return status; + if (attr == &dev_attr_microvolts.attr) { + if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) || + (ops->get_voltage_sel && ops->get_voltage_sel(rdev) >= 0) || + (ops->list_voltage && ops->list_voltage(rdev, 0) >= 0) || + (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1)) + return mode; + return 0; } + if (attr == &dev_attr_microamps.attr) + return ops->get_current_limit ? mode : 0; + + if (attr == &dev_attr_opmode.attr) + return ops->get_mode ? mode : 0; + + if (attr == &dev_attr_state.attr) + return (rdev->ena_pin || ops->is_enabled) ? mode : 0; + + if (attr == &dev_attr_status.attr) + return ops->get_status ? mode : 0; + + if (attr == &dev_attr_bypass.attr) + return ops->get_bypass ? mode : 0; + /* some attributes are type-specific */ - if (rdev->desc->type == REGULATOR_CURRENT) { - status = device_create_file(dev, &dev_attr_requested_microamps); - if (status < 0) - return status; - } + if (attr == &dev_attr_requested_microamps.attr) + return rdev->desc->type == REGULATOR_CURRENT ? mode : 0; /* all the other attributes exist to support constraints; * don't show them if there are no constraints, or if the * relevant supporting methods are missing. */ if (!rdev->constraints) - return status; + return 0; /* constraints need specific supporting methods */ - if (ops->set_voltage || ops->set_voltage_sel) { - status = device_create_file(dev, &dev_attr_min_microvolts); - if (status < 0) - return status; - status = device_create_file(dev, &dev_attr_max_microvolts); - if (status < 0) - return status; - } - if (ops->set_current_limit) { - status = device_create_file(dev, &dev_attr_min_microamps); - if (status < 0) - return status; - status = device_create_file(dev, &dev_attr_max_microamps); - if (status < 0) - return status; - } - - status = device_create_file(dev, &dev_attr_suspend_standby_state); - if (status < 0) - return status; - status = device_create_file(dev, &dev_attr_suspend_mem_state); - if (status < 0) - return status; - status = device_create_file(dev, &dev_attr_suspend_disk_state); - if (status < 0) - return status; + if (attr == &dev_attr_min_microvolts.attr || + attr == &dev_attr_max_microvolts.attr) + return (ops->set_voltage || ops->set_voltage_sel) ? mode : 0; + + if (attr == &dev_attr_min_microamps.attr || + attr == &dev_attr_max_microamps.attr) + return ops->set_current_limit ? mode : 0; - if (ops->set_suspend_voltage) { - status = device_create_file(dev, - &dev_attr_suspend_standby_microvolts); - if (status < 0) - return status; - status = device_create_file(dev, - &dev_attr_suspend_mem_microvolts); - if (status < 0) - return status; - status = device_create_file(dev, - &dev_attr_suspend_disk_microvolts); - if (status < 0) - return status; - } - - if (ops->set_suspend_mode) { - status = device_create_file(dev, - &dev_attr_suspend_standby_mode); - if (status < 0) - return status; - status = device_create_file(dev, - &dev_attr_suspend_mem_mode); - if (status < 0) - return status; - status = device_create_file(dev, - &dev_attr_suspend_disk_mode); - if (status < 0) - return status; - } - - return status; + if (attr == &dev_attr_suspend_standby_state.attr || + attr == &dev_attr_suspend_mem_state.attr || + attr == &dev_attr_suspend_disk_state.attr) + return mode; + + if (attr == &dev_attr_suspend_standby_microvolts.attr || + attr == &dev_attr_suspend_mem_microvolts.attr || + attr == &dev_attr_suspend_disk_microvolts.attr) + return ops->set_suspend_voltage ? mode : 0; + + if (attr == &dev_attr_suspend_standby_mode.attr || + attr == &dev_attr_suspend_mem_mode.attr || + attr == &dev_attr_suspend_disk_mode.attr) + return ops->set_suspend_mode ? mode : 0; + + return mode; } +static const struct attribute_group regulator_dev_group = { + .attrs = regulator_dev_attrs, + .is_visible = regulator_attr_is_visible, +}; + +static const struct attribute_group *regulator_dev_groups[] = { + ®ulator_dev_group, + NULL +}; + +static void regulator_dev_release(struct device *dev) +{ + struct regulator_dev *rdev = dev_get_drvdata(dev); + kfree(rdev); +} + +static struct class regulator_class = { + .name = "regulator", + .dev_release = regulator_dev_release, + .dev_groups = regulator_dev_groups, +}; + static void rdev_init_debugfs(struct regulator_dev *rdev) { rdev->debugfs = debugfs_create_dir(rdev_get_name(rdev), debugfs_root); @@ -3575,7 +3519,7 @@ static void rdev_init_debugfs(struct regulator_dev *rdev) /** * regulator_register - register regulator * @regulator_desc: regulator to register - * @config: runtime configuration for regulator + * @cfg: runtime configuration for regulator * * Called by regulator drivers to register a regulator. * Returns a valid pointer to struct regulator_dev on success @@ -3583,20 +3527,21 @@ static void rdev_init_debugfs(struct regulator_dev *rdev) */ struct regulator_dev * regulator_register(const struct regulator_desc *regulator_desc, - const struct regulator_config *config) + const struct regulator_config *cfg) { const struct regulation_constraints *constraints = NULL; const struct regulator_init_data *init_data; - static atomic_t regulator_no = ATOMIC_INIT(0); + struct regulator_config *config = NULL; + static atomic_t regulator_no = ATOMIC_INIT(-1); struct regulator_dev *rdev; struct device *dev; int ret, i; const char *supply = NULL; - if (regulator_desc == NULL || config == NULL) + if (regulator_desc == NULL || cfg == NULL) return ERR_PTR(-EINVAL); - dev = config->dev; + dev = cfg->dev; WARN_ON(!dev); if (regulator_desc->name == NULL || regulator_desc->ops == NULL) @@ -3626,7 +3571,17 @@ regulator_register(const struct regulator_desc *regulator_desc, if (rdev == NULL) return ERR_PTR(-ENOMEM); - init_data = regulator_of_get_init_data(dev, regulator_desc, + /* + * Duplicate the config so the driver could override it after + * parsing init data. + */ + config = kmemdup(cfg, sizeof(*cfg), GFP_KERNEL); + if (config == NULL) { + kfree(rdev); + return ERR_PTR(-ENOMEM); + } + + init_data = regulator_of_get_init_data(dev, regulator_desc, config, &rdev->dev.of_node); if (!init_data) { init_data = config->init_data; @@ -3660,8 +3615,8 @@ regulator_register(const struct regulator_desc *regulator_desc, /* register with sysfs */ rdev->dev.class = ®ulator_class; rdev->dev.parent = dev; - dev_set_name(&rdev->dev, "regulator.%d", - atomic_inc_return(®ulator_no) - 1); + dev_set_name(&rdev->dev, "regulator.%lu", + (unsigned long) atomic_inc_return(®ulator_no)); ret = device_register(&rdev->dev); if (ret != 0) { put_device(&rdev->dev); @@ -3694,11 +3649,6 @@ regulator_register(const struct regulator_desc *regulator_desc, if (ret < 0) goto scrub; - /* add attributes supported by this regulator */ - ret = add_regulator_attributes(rdev); - if (ret < 0) - goto scrub; - if (init_data && init_data->supply_regulator) supply = init_data->supply_regulator; else if (regulator_desc->supply_name) @@ -3754,6 +3704,7 @@ add_dev: rdev_init_debugfs(rdev); out: mutex_unlock(®ulator_list_mutex); + kfree(config); return rdev; unset_supplies: diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index c78d2106d6cb..01343419555e 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -24,6 +24,7 @@ #include <linux/regmap.h> #include <linux/irq.h> #include <linux/interrupt.h> +#include <linux/of_gpio.h> #include <linux/regulator/of_regulator.h> #include <linux/regulator/da9211.h> #include "da9211-regulator.h" @@ -276,7 +277,10 @@ static struct da9211_pdata *da9211_parse_regulators_dt( continue; pdata->init_data[n] = da9211_matches[i].init_data; - + pdata->reg_node[n] = da9211_matches[i].of_node; + pdata->gpio_ren[n] = + of_get_named_gpio(da9211_matches[i].of_node, + "enable-gpios", 0); n++; } @@ -364,7 +368,15 @@ static int da9211_regulator_init(struct da9211 *chip) config.dev = chip->dev; config.driver_data = chip; config.regmap = chip->regmap; - config.of_node = chip->dev->of_node; + config.of_node = chip->pdata->reg_node[i]; + + if (gpio_is_valid(chip->pdata->gpio_ren[i])) { + config.ena_gpio = chip->pdata->gpio_ren[i]; + config.ena_gpio_initialized = true; + } else { + config.ena_gpio = -EINVAL; + config.ena_gpio_initialized = false; + } chip->rdev[i] = devm_regulator_register(chip->dev, &da9211_regulators[i], &config); diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c index 6c43ab2d5121..3c25db89a021 100644 --- a/drivers/regulator/fan53555.c +++ b/drivers/regulator/fan53555.c @@ -147,7 +147,7 @@ static unsigned int fan53555_get_mode(struct regulator_dev *rdev) return REGULATOR_MODE_NORMAL; } -static int slew_rates[] = { +static const int slew_rates[] = { 64000, 32000, 16000, @@ -296,7 +296,7 @@ static int fan53555_regulator_register(struct fan53555_device_info *di, return PTR_ERR_OR_ZERO(di->rdev); } -static struct regmap_config fan53555_regmap_config = { +static const struct regmap_config fan53555_regmap_config = { .reg_bits = 8, .val_bits = 8, }; diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h index 80ba2a35a04b..c74ac8734023 100644 --- a/drivers/regulator/internal.h +++ b/drivers/regulator/internal.h @@ -38,11 +38,13 @@ struct regulator { #ifdef CONFIG_OF struct regulator_init_data *regulator_of_get_init_data(struct device *dev, const struct regulator_desc *desc, + struct regulator_config *config, struct device_node **node); #else static inline struct regulator_init_data * regulator_of_get_init_data(struct device *dev, const struct regulator_desc *desc, + struct regulator_config *config, struct device_node **node) { return NULL; diff --git a/drivers/regulator/isl9305.c b/drivers/regulator/isl9305.c index 92fefd98da58..6e3a15fe00f1 100644 --- a/drivers/regulator/isl9305.c +++ b/drivers/regulator/isl9305.c @@ -177,8 +177,10 @@ static int isl9305_i2c_probe(struct i2c_client *i2c, #ifdef CONFIG_OF static const struct of_device_id isl9305_dt_ids[] = { - { .compatible = "isl,isl9305" }, - { .compatible = "isl,isl9305h" }, + { .compatible = "isl,isl9305" }, /* for backward compat., don't use */ + { .compatible = "isil,isl9305" }, + { .compatible = "isl,isl9305h" }, /* for backward compat., don't use */ + { .compatible = "isil,isl9305h" }, {}, }; #endif diff --git a/drivers/regulator/lp872x.c b/drivers/regulator/lp872x.c index 021d64d856bb..3de328ab41f3 100644 --- a/drivers/regulator/lp872x.c +++ b/drivers/regulator/lp872x.c @@ -106,7 +106,6 @@ struct lp872x { struct device *dev; enum lp872x_id chipid; struct lp872x_platform_data *pdata; - struct regulator_dev **regulators; int num_regulators; enum lp872x_dvs_state dvs_pin; int dvs_gpio; @@ -801,8 +800,6 @@ static int lp872x_regulator_register(struct lp872x *lp) dev_err(lp->dev, "regulator register err"); return PTR_ERR(rdev); } - - *(lp->regulators + i) = rdev; } return 0; @@ -906,7 +903,7 @@ static struct lp872x_platform_data static int lp872x_probe(struct i2c_client *cl, const struct i2c_device_id *id) { struct lp872x *lp; - int ret, size, num_regulators; + int ret; const int lp872x_num_regulators[] = { [LP8720] = LP8720_NUM_REGULATORS, [LP8725] = LP8725_NUM_REGULATORS, @@ -918,38 +915,27 @@ static int lp872x_probe(struct i2c_client *cl, const struct i2c_device_id *id) lp = devm_kzalloc(&cl->dev, sizeof(struct lp872x), GFP_KERNEL); if (!lp) - goto err_mem; - - num_regulators = lp872x_num_regulators[id->driver_data]; - size = sizeof(struct regulator_dev *) * num_regulators; + return -ENOMEM; - lp->regulators = devm_kzalloc(&cl->dev, size, GFP_KERNEL); - if (!lp->regulators) - goto err_mem; + lp->num_regulators = lp872x_num_regulators[id->driver_data]; lp->regmap = devm_regmap_init_i2c(cl, &lp872x_regmap_config); if (IS_ERR(lp->regmap)) { ret = PTR_ERR(lp->regmap); dev_err(&cl->dev, "regmap init i2c err: %d\n", ret); - goto err_dev; + return ret; } lp->dev = &cl->dev; lp->pdata = dev_get_platdata(&cl->dev); lp->chipid = id->driver_data; - lp->num_regulators = num_regulators; i2c_set_clientdata(cl, lp); ret = lp872x_config(lp); if (ret) - goto err_dev; + return ret; return lp872x_regulator_register(lp); - -err_mem: - return -ENOMEM; -err_dev: - return ret; } static const struct of_device_id lp872x_dt_ids[] = { diff --git a/drivers/regulator/max14577.c b/drivers/regulator/max14577.c index bf9a44c5fdd2..b3678d289619 100644 --- a/drivers/regulator/max14577.c +++ b/drivers/regulator/max14577.c @@ -103,6 +103,8 @@ static struct regulator_ops max14577_charger_ops = { static const struct regulator_desc max14577_supported_regulators[] = { [MAX14577_SAFEOUT] = { .name = "SAFEOUT", + .of_match = of_match_ptr("SAFEOUT"), + .regulators_node = of_match_ptr("regulators"), .id = MAX14577_SAFEOUT, .ops = &max14577_safeout_ops, .type = REGULATOR_VOLTAGE, @@ -114,6 +116,8 @@ static const struct regulator_desc max14577_supported_regulators[] = { }, [MAX14577_CHARGER] = { .name = "CHARGER", + .of_match = of_match_ptr("CHARGER"), + .regulators_node = of_match_ptr("regulators"), .id = MAX14577_CHARGER, .ops = &max14577_charger_ops, .type = REGULATOR_CURRENT, @@ -137,6 +141,8 @@ static struct regulator_ops max77836_ldo_ops = { static const struct regulator_desc max77836_supported_regulators[] = { [MAX14577_SAFEOUT] = { .name = "SAFEOUT", + .of_match = of_match_ptr("SAFEOUT"), + .regulators_node = of_match_ptr("regulators"), .id = MAX14577_SAFEOUT, .ops = &max14577_safeout_ops, .type = REGULATOR_VOLTAGE, @@ -148,6 +154,8 @@ static const struct regulator_desc max77836_supported_regulators[] = { }, [MAX14577_CHARGER] = { .name = "CHARGER", + .of_match = of_match_ptr("CHARGER"), + .regulators_node = of_match_ptr("regulators"), .id = MAX14577_CHARGER, .ops = &max14577_charger_ops, .type = REGULATOR_CURRENT, @@ -157,6 +165,8 @@ static const struct regulator_desc max77836_supported_regulators[] = { }, [MAX77836_LDO1] = { .name = "LDO1", + .of_match = of_match_ptr("LDO1"), + .regulators_node = of_match_ptr("regulators"), .id = MAX77836_LDO1, .ops = &max77836_ldo_ops, .type = REGULATOR_VOLTAGE, @@ -171,6 +181,8 @@ static const struct regulator_desc max77836_supported_regulators[] = { }, [MAX77836_LDO2] = { .name = "LDO2", + .of_match = of_match_ptr("LDO2"), + .regulators_node = of_match_ptr("regulators"), .id = MAX77836_LDO2, .ops = &max77836_ldo_ops, .type = REGULATOR_VOLTAGE, @@ -198,43 +210,6 @@ static struct of_regulator_match max77836_regulator_matches[] = { { .name = "LDO2", }, }; -static int max14577_regulator_dt_parse_pdata(struct platform_device *pdev, - enum maxim_device_type dev_type) -{ - int ret; - struct device_node *np; - struct of_regulator_match *regulator_matches; - unsigned int regulator_matches_size; - - np = of_get_child_by_name(pdev->dev.parent->of_node, "regulators"); - if (!np) { - dev_err(&pdev->dev, "Failed to get child OF node for regulators\n"); - return -EINVAL; - } - - switch (dev_type) { - case MAXIM_DEVICE_TYPE_MAX77836: - regulator_matches = max77836_regulator_matches; - regulator_matches_size = ARRAY_SIZE(max77836_regulator_matches); - break; - case MAXIM_DEVICE_TYPE_MAX14577: - default: - regulator_matches = max14577_regulator_matches; - regulator_matches_size = ARRAY_SIZE(max14577_regulator_matches); - } - - ret = of_regulator_match(&pdev->dev, np, regulator_matches, - regulator_matches_size); - if (ret < 0) - dev_err(&pdev->dev, "Error parsing regulator init data: %d\n", ret); - else - ret = 0; - - of_node_put(np); - - return ret; -} - static inline struct regulator_init_data *match_init_data(int index, enum maxim_device_type dev_type) { @@ -261,11 +236,6 @@ static inline struct device_node *match_of_node(int index, } } #else /* CONFIG_OF */ -static int max14577_regulator_dt_parse_pdata(struct platform_device *pdev, - enum maxim_device_type dev_type) -{ - return 0; -} static inline struct regulator_init_data *match_init_data(int index, enum maxim_device_type dev_type) { @@ -308,16 +278,12 @@ static int max14577_regulator_probe(struct platform_device *pdev) { struct max14577 *max14577 = dev_get_drvdata(pdev->dev.parent); struct max14577_platform_data *pdata = dev_get_platdata(max14577->dev); - int i, ret; + int i, ret = 0; struct regulator_config config = {}; const struct regulator_desc *supported_regulators; unsigned int supported_regulators_size; enum maxim_device_type dev_type = max14577->dev_type; - ret = max14577_regulator_dt_parse_pdata(pdev, dev_type); - if (ret) - return ret; - switch (dev_type) { case MAXIM_DEVICE_TYPE_MAX77836: supported_regulators = max77836_supported_regulators; @@ -329,7 +295,7 @@ static int max14577_regulator_probe(struct platform_device *pdev) supported_regulators_size = ARRAY_SIZE(max14577_supported_regulators); } - config.dev = &pdev->dev; + config.dev = max14577->dev; config.driver_data = max14577; for (i = 0; i < supported_regulators_size; i++) { diff --git a/drivers/regulator/max77686.c b/drivers/regulator/max77686.c index 10d206266ac2..15fb1416bfbd 100644 --- a/drivers/regulator/max77686.c +++ b/drivers/regulator/max77686.c @@ -26,6 +26,7 @@ #include <linux/bug.h> #include <linux/err.h> #include <linux/gpio.h> +#include <linux/of_gpio.h> #include <linux/slab.h> #include <linux/platform_device.h> #include <linux/regulator/driver.h> @@ -46,6 +47,11 @@ #define MAX77686_DVS_UVSTEP 12500 /* + * Value for configuring buck[89] and LDO{20,21,22} as GPIO control. + * It is the same as 'off' for other regulators. + */ +#define MAX77686_GPIO_CONTROL 0x0 +/* * Values used for configuring LDOs and bucks. * Forcing low power mode: LDO1, 3-5, 9, 13, 17-26 */ @@ -82,6 +88,8 @@ enum max77686_ramp_rate { }; struct max77686_data { + u64 gpio_enabled:MAX77686_REGULATORS; + /* Array indexed by regulator id */ unsigned int opmode[MAX77686_REGULATORS]; }; @@ -100,6 +108,26 @@ static unsigned int max77686_get_opmode_shift(int id) } } +/* + * When regulator is configured for GPIO control then it + * replaces "normal" mode. Any change from low power mode to normal + * should actually change to GPIO control. + * Map normal mode to proper value for such regulators. + */ +static unsigned int max77686_map_normal_mode(struct max77686_data *max77686, + int id) +{ + switch (id) { + case MAX77686_BUCK8: + case MAX77686_BUCK9: + case MAX77686_LDO20 ... MAX77686_LDO22: + if (max77686->gpio_enabled & (1 << id)) + return MAX77686_GPIO_CONTROL; + } + + return MAX77686_NORMAL; +} + /* Some BUCKs and LDOs supports Normal[ON/OFF] mode during suspend */ static int max77686_set_suspend_disable(struct regulator_dev *rdev) { @@ -136,7 +164,7 @@ static int max77686_set_suspend_mode(struct regulator_dev *rdev, val = MAX77686_LDO_LOWPOWER_PWRREQ; break; case REGULATOR_MODE_NORMAL: /* ON in Normal Mode */ - val = MAX77686_NORMAL; + val = max77686_map_normal_mode(max77686, id); break; default: pr_warn("%s: regulator_suspend_mode : 0x%x not supported\n", @@ -160,7 +188,7 @@ static int max77686_ldo_set_suspend_mode(struct regulator_dev *rdev, { unsigned int val; struct max77686_data *max77686 = rdev_get_drvdata(rdev); - int ret; + int ret, id = rdev_get_id(rdev); switch (mode) { case REGULATOR_MODE_STANDBY: /* switch off */ @@ -170,7 +198,7 @@ static int max77686_ldo_set_suspend_mode(struct regulator_dev *rdev, val = MAX77686_LDO_LOWPOWER_PWRREQ; break; case REGULATOR_MODE_NORMAL: /* ON in Normal Mode */ - val = MAX77686_NORMAL; + val = max77686_map_normal_mode(max77686, id); break; default: pr_warn("%s: regulator_suspend_mode : 0x%x not supported\n", @@ -184,7 +212,7 @@ static int max77686_ldo_set_suspend_mode(struct regulator_dev *rdev, if (ret) return ret; - max77686->opmode[rdev_get_id(rdev)] = val; + max77686->opmode[id] = val; return 0; } @@ -197,7 +225,7 @@ static int max77686_enable(struct regulator_dev *rdev) shift = max77686_get_opmode_shift(id); if (max77686->opmode[id] == MAX77686_OFF_PWRREQ) - max77686->opmode[id] = MAX77686_NORMAL; + max77686->opmode[id] = max77686_map_normal_mode(max77686, id); return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg, rdev->desc->enable_mask, @@ -229,6 +257,36 @@ static int max77686_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay) MAX77686_RAMP_RATE_MASK, ramp_value << 6); } +static int max77686_of_parse_cb(struct device_node *np, + const struct regulator_desc *desc, + struct regulator_config *config) +{ + struct max77686_data *max77686 = config->driver_data; + + switch (desc->id) { + case MAX77686_BUCK8: + case MAX77686_BUCK9: + case MAX77686_LDO20 ... MAX77686_LDO22: + config->ena_gpio = of_get_named_gpio(np, + "maxim,ena-gpios", 0); + config->ena_gpio_flags = GPIOF_OUT_INIT_HIGH; + config->ena_gpio_initialized = true; + break; + default: + return 0; + } + + if (gpio_is_valid(config->ena_gpio)) { + max77686->gpio_enabled |= (1 << desc->id); + + return regmap_update_bits(config->regmap, desc->enable_reg, + desc->enable_mask, + MAX77686_GPIO_CONTROL); + } + + return 0; +} + static struct regulator_ops max77686_ops = { .list_voltage = regulator_list_voltage_linear, .map_voltage = regulator_map_voltage_linear, @@ -283,6 +341,7 @@ static struct regulator_ops max77686_buck_dvs_ops = { .name = "LDO"#num, \ .of_match = of_match_ptr("LDO"#num), \ .regulators_node = of_match_ptr("voltage-regulators"), \ + .of_parse_cb = max77686_of_parse_cb, \ .id = MAX77686_LDO##num, \ .ops = &max77686_ops, \ .type = REGULATOR_VOLTAGE, \ @@ -355,6 +414,7 @@ static struct regulator_ops max77686_buck_dvs_ops = { .name = "BUCK"#num, \ .of_match = of_match_ptr("BUCK"#num), \ .regulators_node = of_match_ptr("voltage-regulators"), \ + .of_parse_cb = max77686_of_parse_cb, \ .id = MAX77686_BUCK##num, \ .ops = &max77686_ops, \ .type = REGULATOR_VOLTAGE, \ diff --git a/drivers/regulator/max77843.c b/drivers/regulator/max77843.c new file mode 100644 index 000000000000..c132ef527cdd --- /dev/null +++ b/drivers/regulator/max77843.c @@ -0,0 +1,227 @@ +/* + * max77843.c - Regulator driver for the Maxim MAX77843 + * + * Copyright (C) 2015 Samsung Electronics + * Author: Jaewon Kim <jaewon02.kim@samsung.com> + * Author: Beomho Seo <beomho.seo@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/regulator/driver.h> +#include <linux/regulator/machine.h> +#include <linux/mfd/max77843-private.h> +#include <linux/regulator/of_regulator.h> + +enum max77843_regulator_type { + MAX77843_SAFEOUT1 = 0, + MAX77843_SAFEOUT2, + MAX77843_CHARGER, + + MAX77843_NUM, +}; + +static const unsigned int max77843_safeout_voltage_table[] = { + 4850000, + 4900000, + 4950000, + 3300000, +}; + +static int max77843_reg_is_enabled(struct regulator_dev *rdev) +{ + struct regmap *regmap = rdev->regmap; + int ret; + unsigned int reg; + + ret = regmap_read(regmap, rdev->desc->enable_reg, ®); + if (ret) { + dev_err(&rdev->dev, "Fialed to read charger register\n"); + return ret; + } + + return (reg & rdev->desc->enable_mask) == rdev->desc->enable_mask; +} + +static int max77843_reg_get_current_limit(struct regulator_dev *rdev) +{ + struct regmap *regmap = rdev->regmap; + unsigned int chg_min_uA = rdev->constraints->min_uA; + unsigned int chg_max_uA = rdev->constraints->max_uA; + unsigned int val; + int ret; + unsigned int reg, sel; + + ret = regmap_read(regmap, MAX77843_CHG_REG_CHG_CNFG_02, ®); + if (ret) { + dev_err(&rdev->dev, "Failed to read charger register\n"); + return ret; + } + + sel = reg & MAX77843_CHG_FAST_CHG_CURRENT_MASK; + + if (sel < 0x03) + sel = 0; + else + sel -= 2; + + val = chg_min_uA + MAX77843_CHG_FAST_CHG_CURRENT_STEP * sel; + if (val > chg_max_uA) + return -EINVAL; + + return val; +} + +static int max77843_reg_set_current_limit(struct regulator_dev *rdev, + int min_uA, int max_uA) +{ + struct regmap *regmap = rdev->regmap; + unsigned int chg_min_uA = rdev->constraints->min_uA; + int sel = 0; + + while (chg_min_uA + MAX77843_CHG_FAST_CHG_CURRENT_STEP * sel < min_uA) + sel++; + + if (chg_min_uA + MAX77843_CHG_FAST_CHG_CURRENT_STEP * sel > max_uA) + return -EINVAL; + + sel += 2; + + return regmap_write(regmap, MAX77843_CHG_REG_CHG_CNFG_02, sel); +} + +static struct regulator_ops max77843_charger_ops = { + .is_enabled = max77843_reg_is_enabled, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .get_current_limit = max77843_reg_get_current_limit, + .set_current_limit = max77843_reg_set_current_limit, +}; + +static struct regulator_ops max77843_regulator_ops = { + .is_enabled = regulator_is_enabled_regmap, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .list_voltage = regulator_list_voltage_table, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_sel = regulator_set_voltage_sel_regmap, +}; + +static const struct regulator_desc max77843_supported_regulators[] = { + [MAX77843_SAFEOUT1] = { + .name = "SAFEOUT1", + .id = MAX77843_SAFEOUT1, + .ops = &max77843_regulator_ops, + .of_match = of_match_ptr("SAFEOUT1"), + .regulators_node = of_match_ptr("regulators"), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(max77843_safeout_voltage_table), + .volt_table = max77843_safeout_voltage_table, + .enable_reg = MAX77843_SYS_REG_SAFEOUTCTRL, + .enable_mask = MAX77843_REG_SAFEOUTCTRL_ENSAFEOUT1, + .vsel_reg = MAX77843_SYS_REG_SAFEOUTCTRL, + .vsel_mask = MAX77843_REG_SAFEOUTCTRL_SAFEOUT1_MASK, + }, + [MAX77843_SAFEOUT2] = { + .name = "SAFEOUT2", + .id = MAX77843_SAFEOUT2, + .ops = &max77843_regulator_ops, + .of_match = of_match_ptr("SAFEOUT2"), + .regulators_node = of_match_ptr("regulators"), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + .n_voltages = ARRAY_SIZE(max77843_safeout_voltage_table), + .volt_table = max77843_safeout_voltage_table, + .enable_reg = MAX77843_SYS_REG_SAFEOUTCTRL, + .enable_mask = MAX77843_REG_SAFEOUTCTRL_ENSAFEOUT2, + .vsel_reg = MAX77843_SYS_REG_SAFEOUTCTRL, + .vsel_mask = MAX77843_REG_SAFEOUTCTRL_SAFEOUT2_MASK, + }, + [MAX77843_CHARGER] = { + .name = "CHARGER", + .id = MAX77843_CHARGER, + .ops = &max77843_charger_ops, + .of_match = of_match_ptr("CHARGER"), + .regulators_node = of_match_ptr("regulators"), + .type = REGULATOR_CURRENT, + .owner = THIS_MODULE, + .enable_reg = MAX77843_CHG_REG_CHG_CNFG_00, + .enable_mask = MAX77843_CHG_MASK, + }, +}; + +static struct regmap *max77843_get_regmap(struct max77843 *max77843, int reg_id) +{ + switch (reg_id) { + case MAX77843_SAFEOUT1: + case MAX77843_SAFEOUT2: + return max77843->regmap; + case MAX77843_CHARGER: + return max77843->regmap_chg; + default: + return max77843->regmap; + } +} + +static int max77843_regulator_probe(struct platform_device *pdev) +{ + struct max77843 *max77843 = dev_get_drvdata(pdev->dev.parent); + struct regulator_config config = {}; + int i; + + config.dev = max77843->dev; + config.driver_data = max77843; + + for (i = 0; i < ARRAY_SIZE(max77843_supported_regulators); i++) { + struct regulator_dev *regulator; + + config.regmap = max77843_get_regmap(max77843, + max77843_supported_regulators[i].id); + + regulator = devm_regulator_register(&pdev->dev, + &max77843_supported_regulators[i], &config); + if (IS_ERR(regulator)) { + dev_err(&pdev->dev, + "Failed to regiser regulator-%d\n", i); + return PTR_ERR(regulator); + } + } + + return 0; +} + +static const struct platform_device_id max77843_regulator_id[] = { + { "max77843-regulator", }, + { /* sentinel */ }, +}; + +static struct platform_driver max77843_regulator_driver = { + .driver = { + .name = "max77843-regulator", + }, + .probe = max77843_regulator_probe, + .id_table = max77843_regulator_id, +}; + +static int __init max77843_regulator_init(void) +{ + return platform_driver_register(&max77843_regulator_driver); +} +subsys_initcall(max77843_regulator_init); + +static void __exit max77843_regulator_exit(void) +{ + platform_driver_unregister(&max77843_regulator_driver); +} +module_exit(max77843_regulator_exit); + +MODULE_AUTHOR("Jaewon Kim <jaewon02.kim@samsung.com>"); +MODULE_AUTHOR("Beomho Seo <beomho.seo@samsung.com>"); +MODULE_DESCRIPTION("Maxim MAX77843 regulator driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/regulator/max8649.c b/drivers/regulator/max8649.c index c8bddcc8f911..81229579ece9 100644 --- a/drivers/regulator/max8649.c +++ b/drivers/regulator/max8649.c @@ -115,7 +115,7 @@ static unsigned int max8649_get_mode(struct regulator_dev *rdev) return REGULATOR_MODE_NORMAL; } -static struct regulator_ops max8649_dcdc_ops = { +static const struct regulator_ops max8649_dcdc_ops = { .set_voltage_sel = regulator_set_voltage_sel_regmap, .get_voltage_sel = regulator_get_voltage_sel_regmap, .list_voltage = regulator_list_voltage_linear, @@ -143,7 +143,7 @@ static struct regulator_desc dcdc_desc = { .enable_is_inverted = true, }; -static struct regmap_config max8649_regmap_config = { +static const struct regmap_config max8649_regmap_config = { .reg_bits = 8, .val_bits = 8, }; diff --git a/drivers/regulator/mt6397-regulator.c b/drivers/regulator/mt6397-regulator.c new file mode 100644 index 000000000000..a5b2f4762677 --- /dev/null +++ b/drivers/regulator/mt6397-regulator.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2014 MediaTek Inc. + * Author: Flora Fu <flora.fu@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/mfd/mt6397/core.h> +#include <linux/mfd/mt6397/registers.h> +#include <linux/regulator/driver.h> +#include <linux/regulator/machine.h> +#include <linux/regulator/mt6397-regulator.h> +#include <linux/regulator/of_regulator.h> + +/* + * MT6397 regulators' information + * + * @desc: standard fields of regulator description. + * @qi: Mask for query enable signal status of regulators + * @vselon_reg: Register sections for hardware control mode of bucks + * @vselctrl_reg: Register for controlling the buck control mode. + * @vselctrl_mask: Mask for query buck's voltage control mode. + */ +struct mt6397_regulator_info { + struct regulator_desc desc; + u32 qi; + u32 vselon_reg; + u32 vselctrl_reg; + u32 vselctrl_mask; +}; + +#define MT6397_BUCK(match, vreg, min, max, step, volt_ranges, enreg, \ + vosel, vosel_mask, voselon, vosel_ctrl) \ +[MT6397_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6397_volt_range_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6397_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = (max - min)/step + 1, \ + .linear_ranges = volt_ranges, \ + .n_linear_ranges = ARRAY_SIZE(volt_ranges), \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + .enable_reg = enreg, \ + .enable_mask = BIT(0), \ + }, \ + .qi = BIT(13), \ + .vselon_reg = voselon, \ + .vselctrl_reg = vosel_ctrl, \ + .vselctrl_mask = BIT(1), \ +} + +#define MT6397_LDO(match, vreg, ldo_volt_table, enreg, enbit, vosel, \ + vosel_mask) \ +[MT6397_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6397_volt_table_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6397_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = ARRAY_SIZE(ldo_volt_table), \ + .volt_table = ldo_volt_table, \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + .enable_reg = enreg, \ + .enable_mask = BIT(enbit), \ + }, \ + .qi = BIT(15), \ +} + +#define MT6397_REG_FIXED(match, vreg, enreg, enbit, volt) \ +[MT6397_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6397_volt_fixed_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6397_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = 1, \ + .enable_reg = enreg, \ + .enable_mask = BIT(enbit), \ + .min_uV = volt, \ + }, \ + .qi = BIT(15), \ +} + +static const struct regulator_linear_range buck_volt_range1[] = { + REGULATOR_LINEAR_RANGE(700000, 0, 0x7f, 6250), +}; + +static const struct regulator_linear_range buck_volt_range2[] = { + REGULATOR_LINEAR_RANGE(800000, 0, 0x7f, 6250), +}; + +static const struct regulator_linear_range buck_volt_range3[] = { + REGULATOR_LINEAR_RANGE(1500000, 0, 0x1f, 20000), +}; + +static const u32 ldo_volt_table1[] = { + 1500000, 1800000, 2500000, 2800000, +}; + +static const u32 ldo_volt_table2[] = { + 1800000, 3300000, +}; + +static const u32 ldo_volt_table3[] = { + 3000000, 3300000, +}; + +static const u32 ldo_volt_table4[] = { + 1220000, 1300000, 1500000, 1800000, 2500000, 2800000, 3000000, 3300000, +}; + +static const u32 ldo_volt_table5[] = { + 1200000, 1300000, 1500000, 1800000, 2500000, 2800000, 3000000, 3300000, +}; + +static const u32 ldo_volt_table5_v2[] = { + 1200000, 1000000, 1500000, 1800000, 2500000, 2800000, 3000000, 3300000, +}; + +static const u32 ldo_volt_table6[] = { + 1200000, 1300000, 1500000, 1800000, 2500000, 2800000, 3000000, 2000000, +}; + +static const u32 ldo_volt_table7[] = { + 1300000, 1500000, 1800000, 2000000, 2500000, 2800000, 3000000, 3300000, +}; + +static int mt6397_get_status(struct regulator_dev *rdev) +{ + int ret; + u32 regval; + struct mt6397_regulator_info *info = rdev_get_drvdata(rdev); + + ret = regmap_read(rdev->regmap, info->desc.enable_reg, ®val); + if (ret != 0) { + dev_err(&rdev->dev, "Failed to get enable reg: %d\n", ret); + return ret; + } + + return (regval & info->qi) ? REGULATOR_STATUS_ON : REGULATOR_STATUS_OFF; +} + +static struct regulator_ops mt6397_volt_range_ops = { + .list_voltage = regulator_list_voltage_linear_range, + .map_voltage = regulator_map_voltage_linear_range, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6397_get_status, +}; + +static struct regulator_ops mt6397_volt_table_ops = { + .list_voltage = regulator_list_voltage_table, + .map_voltage = regulator_map_voltage_iterate, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6397_get_status, +}; + +static struct regulator_ops mt6397_volt_fixed_ops = { + .list_voltage = regulator_list_voltage_linear, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6397_get_status, +}; + +/* The array is indexed by id(MT6397_ID_XXX) */ +static struct mt6397_regulator_info mt6397_regulators[] = { + MT6397_BUCK("buck_vpca15", VPCA15, 700000, 1493750, 6250, + buck_volt_range1, MT6397_VCA15_CON7, MT6397_VCA15_CON9, 0x7f, + MT6397_VCA15_CON10, MT6397_VCA15_CON5), + MT6397_BUCK("buck_vpca7", VPCA7, 700000, 1493750, 6250, + buck_volt_range1, MT6397_VPCA7_CON7, MT6397_VPCA7_CON9, 0x7f, + MT6397_VPCA7_CON10, MT6397_VPCA7_CON5), + MT6397_BUCK("buck_vsramca15", VSRAMCA15, 700000, 1493750, 6250, + buck_volt_range1, MT6397_VSRMCA15_CON7, MT6397_VSRMCA15_CON9, + 0x7f, MT6397_VSRMCA15_CON10, MT6397_VSRMCA15_CON5), + MT6397_BUCK("buck_vsramca7", VSRAMCA7, 700000, 1493750, 6250, + buck_volt_range1, MT6397_VSRMCA7_CON7, MT6397_VSRMCA7_CON9, + 0x7f, MT6397_VSRMCA7_CON10, MT6397_VSRMCA7_CON5), + MT6397_BUCK("buck_vcore", VCORE, 700000, 1493750, 6250, + buck_volt_range1, MT6397_VCORE_CON7, MT6397_VCORE_CON9, 0x7f, + MT6397_VCORE_CON10, MT6397_VCORE_CON5), + MT6397_BUCK("buck_vgpu", VGPU, 700000, 1493750, 6250, buck_volt_range1, + MT6397_VGPU_CON7, MT6397_VGPU_CON9, 0x7f, + MT6397_VGPU_CON10, MT6397_VGPU_CON5), + MT6397_BUCK("buck_vdrm", VDRM, 800000, 1593750, 6250, buck_volt_range2, + MT6397_VDRM_CON7, MT6397_VDRM_CON9, 0x7f, + MT6397_VDRM_CON10, MT6397_VDRM_CON5), + MT6397_BUCK("buck_vio18", VIO18, 1500000, 2120000, 20000, + buck_volt_range3, MT6397_VIO18_CON7, MT6397_VIO18_CON9, 0x1f, + MT6397_VIO18_CON10, MT6397_VIO18_CON5), + MT6397_REG_FIXED("ldo_vtcxo", VTCXO, MT6397_ANALDO_CON0, 10, 2800000), + MT6397_REG_FIXED("ldo_va28", VA28, MT6397_ANALDO_CON1, 14, 2800000), + MT6397_LDO("ldo_vcama", VCAMA, ldo_volt_table1, + MT6397_ANALDO_CON2, 15, MT6397_ANALDO_CON6, 0xC0), + MT6397_REG_FIXED("ldo_vio28", VIO28, MT6397_DIGLDO_CON0, 14, 2800000), + MT6397_REG_FIXED("ldo_vusb", VUSB, MT6397_DIGLDO_CON1, 14, 3300000), + MT6397_LDO("ldo_vmc", VMC, ldo_volt_table2, + MT6397_DIGLDO_CON2, 12, MT6397_DIGLDO_CON29, 0x10), + MT6397_LDO("ldo_vmch", VMCH, ldo_volt_table3, + MT6397_DIGLDO_CON3, 14, MT6397_DIGLDO_CON17, 0x80), + MT6397_LDO("ldo_vemc3v3", VEMC3V3, ldo_volt_table3, + MT6397_DIGLDO_CON4, 14, MT6397_DIGLDO_CON18, 0x10), + MT6397_LDO("ldo_vgp1", VGP1, ldo_volt_table4, + MT6397_DIGLDO_CON5, 15, MT6397_DIGLDO_CON19, 0xE0), + MT6397_LDO("ldo_vgp2", VGP2, ldo_volt_table5, + MT6397_DIGLDO_CON6, 15, MT6397_DIGLDO_CON20, 0xE0), + MT6397_LDO("ldo_vgp3", VGP3, ldo_volt_table5, + MT6397_DIGLDO_CON7, 15, MT6397_DIGLDO_CON21, 0xE0), + MT6397_LDO("ldo_vgp4", VGP4, ldo_volt_table5, + MT6397_DIGLDO_CON8, 15, MT6397_DIGLDO_CON22, 0xE0), + MT6397_LDO("ldo_vgp5", VGP5, ldo_volt_table6, + MT6397_DIGLDO_CON9, 15, MT6397_DIGLDO_CON23, 0xE0), + MT6397_LDO("ldo_vgp6", VGP6, ldo_volt_table5, + MT6397_DIGLDO_CON10, 15, MT6397_DIGLDO_CON33, 0xE0), + MT6397_LDO("ldo_vibr", VIBR, ldo_volt_table7, + MT6397_DIGLDO_CON24, 15, MT6397_DIGLDO_CON25, 0xE00), +}; + +static int mt6397_set_buck_vosel_reg(struct platform_device *pdev) +{ + struct mt6397_chip *mt6397 = dev_get_drvdata(pdev->dev.parent); + int i; + u32 regval; + + for (i = 0; i < MT6397_MAX_REGULATOR; i++) { + if (mt6397_regulators[i].vselctrl_reg) { + if (regmap_read(mt6397->regmap, + mt6397_regulators[i].vselctrl_reg, + ®val) < 0) { + dev_err(&pdev->dev, + "Failed to read buck ctrl\n"); + return -EIO; + } + + if (regval & mt6397_regulators[i].vselctrl_mask) { + mt6397_regulators[i].desc.vsel_reg = + mt6397_regulators[i].vselon_reg; + } + } + } + + return 0; +} + +static int mt6397_regulator_probe(struct platform_device *pdev) +{ + struct mt6397_chip *mt6397 = dev_get_drvdata(pdev->dev.parent); + struct regulator_config config = {}; + struct regulator_dev *rdev; + int i; + u32 reg_value, version; + + /* Query buck controller to select activated voltage register part */ + if (mt6397_set_buck_vosel_reg(pdev)) + return -EIO; + + /* Read PMIC chip revision to update constraints and voltage table */ + if (regmap_read(mt6397->regmap, MT6397_CID, ®_value) < 0) { + dev_err(&pdev->dev, "Failed to read Chip ID\n"); + return -EIO; + } + dev_info(&pdev->dev, "Chip ID = 0x%x\n", reg_value); + + version = (reg_value & 0xFF); + switch (version) { + case MT6397_REGULATOR_ID91: + mt6397_regulators[MT6397_ID_VGP2].desc.volt_table = + ldo_volt_table5_v2; + break; + default: + break; + } + + for (i = 0; i < MT6397_MAX_REGULATOR; i++) { + config.dev = &pdev->dev; + config.driver_data = &mt6397_regulators[i]; + config.regmap = mt6397->regmap; + rdev = devm_regulator_register(&pdev->dev, + &mt6397_regulators[i].desc, &config); + if (IS_ERR(rdev)) { + dev_err(&pdev->dev, "failed to register %s\n", + mt6397_regulators[i].desc.name); + return PTR_ERR(rdev); + } + } + + return 0; +} + +static struct platform_driver mt6397_regulator_driver = { + .driver = { + .name = "mt6397-regulator", + }, + .probe = mt6397_regulator_probe, +}; + +module_platform_driver(mt6397_regulator_driver); + +MODULE_AUTHOR("Flora Fu <flora.fu@mediatek.com>"); +MODULE_DESCRIPTION("Regulator Driver for MediaTek MT6397 PMIC"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:mt6397-regulator"); diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 91eaaf010524..24e812c48d93 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -270,6 +270,7 @@ EXPORT_SYMBOL_GPL(of_regulator_match); struct regulator_init_data *regulator_of_get_init_data(struct device *dev, const struct regulator_desc *desc, + struct regulator_config *config, struct device_node **node) { struct device_node *search, *child; @@ -307,6 +308,16 @@ struct regulator_init_data *regulator_of_get_init_data(struct device *dev, break; } + if (desc->of_parse_cb) { + if (desc->of_parse_cb(child, desc, config)) { + dev_err(dev, + "driver callback failed to parse DT for regulator %s\n", + child->name); + init_data = NULL; + break; + } + } + of_node_get(child); *node = child; break; diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index c879dff597ee..8cc8d1877c44 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -56,7 +56,7 @@ #define PFUZE100_VGEN5VOL 0x70 #define PFUZE100_VGEN6VOL 0x71 -enum chips { PFUZE100, PFUZE200 }; +enum chips { PFUZE100, PFUZE200, PFUZE3000 = 3 }; struct pfuze_regulator { struct regulator_desc desc; @@ -80,9 +80,18 @@ static const int pfuze100_vsnvs[] = { 1000000, 1100000, 1200000, 1300000, 1500000, 1800000, 3000000, }; +static const int pfuze3000_sw2lo[] = { + 1500000, 1550000, 1600000, 1650000, 1700000, 1750000, 1800000, 1850000, +}; + +static const int pfuze3000_sw2hi[] = { + 2500000, 2800000, 2850000, 3000000, 3100000, 3150000, 3200000, 3300000, +}; + static const struct i2c_device_id pfuze_device_id[] = { {.name = "pfuze100", .driver_data = PFUZE100}, {.name = "pfuze200", .driver_data = PFUZE200}, + {.name = "pfuze3000", .driver_data = PFUZE3000}, { } }; MODULE_DEVICE_TABLE(i2c, pfuze_device_id); @@ -90,6 +99,7 @@ MODULE_DEVICE_TABLE(i2c, pfuze_device_id); static const struct of_device_id pfuze_dt_ids[] = { { .compatible = "fsl,pfuze100", .data = (void *)PFUZE100}, { .compatible = "fsl,pfuze200", .data = (void *)PFUZE200}, + { .compatible = "fsl,pfuze3000", .data = (void *)PFUZE3000}, { } }; MODULE_DEVICE_TABLE(of, pfuze_dt_ids); @@ -219,6 +229,60 @@ static struct regulator_ops pfuze100_swb_regulator_ops = { .stby_mask = 0x20, \ } +#define PFUZE3000_VCC_REG(_chip, _name, base, min, max, step) { \ + .desc = { \ + .name = #_name, \ + .n_voltages = ((max) - (min)) / (step) + 1, \ + .ops = &pfuze100_ldo_regulator_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = _chip ## _ ## _name, \ + .owner = THIS_MODULE, \ + .min_uV = (min), \ + .uV_step = (step), \ + .vsel_reg = (base), \ + .vsel_mask = 0x3, \ + .enable_reg = (base), \ + .enable_mask = 0x10, \ + }, \ + .stby_reg = (base), \ + .stby_mask = 0x20, \ +} + + +#define PFUZE3000_SW2_REG(_chip, _name, base, min, max, step) { \ + .desc = { \ + .name = #_name,\ + .n_voltages = ((max) - (min)) / (step) + 1, \ + .ops = &pfuze100_sw_regulator_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = _chip ## _ ## _name, \ + .owner = THIS_MODULE, \ + .min_uV = (min), \ + .uV_step = (step), \ + .vsel_reg = (base) + PFUZE100_VOL_OFFSET, \ + .vsel_mask = 0x7, \ + }, \ + .stby_reg = (base) + PFUZE100_STANDBY_OFFSET, \ + .stby_mask = 0x7, \ +} + +#define PFUZE3000_SW3_REG(_chip, _name, base, min, max, step) { \ + .desc = { \ + .name = #_name,\ + .n_voltages = ((max) - (min)) / (step) + 1, \ + .ops = &pfuze100_sw_regulator_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = _chip ## _ ## _name, \ + .owner = THIS_MODULE, \ + .min_uV = (min), \ + .uV_step = (step), \ + .vsel_reg = (base) + PFUZE100_VOL_OFFSET, \ + .vsel_mask = 0xf, \ + }, \ + .stby_reg = (base) + PFUZE100_STANDBY_OFFSET, \ + .stby_mask = 0xf, \ +} + /* PFUZE100 */ static struct pfuze_regulator pfuze100_regulators[] = { PFUZE100_SW_REG(PFUZE100, SW1AB, PFUZE100_SW1ABVOL, 300000, 1875000, 25000), @@ -254,6 +318,22 @@ static struct pfuze_regulator pfuze200_regulators[] = { PFUZE100_VGEN_REG(PFUZE200, VGEN6, PFUZE100_VGEN6VOL, 1800000, 3300000, 100000), }; +static struct pfuze_regulator pfuze3000_regulators[] = { + PFUZE100_SW_REG(PFUZE3000, SW1A, PFUZE100_SW1ABVOL, 700000, 1475000, 25000), + PFUZE100_SW_REG(PFUZE3000, SW1B, PFUZE100_SW1CVOL, 700000, 1475000, 25000), + PFUZE100_SWB_REG(PFUZE3000, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo), + PFUZE3000_SW3_REG(PFUZE3000, SW3, PFUZE100_SW3AVOL, 900000, 1650000, 50000), + PFUZE100_SWB_REG(PFUZE3000, SWBST, PFUZE100_SWBSTCON1, 0x3, pfuze100_swbst), + PFUZE100_SWB_REG(PFUZE3000, VSNVS, PFUZE100_VSNVSVOL, 0x7, pfuze100_vsnvs), + PFUZE100_FIXED_REG(PFUZE3000, VREFDDR, PFUZE100_VREFDDRCON, 750000), + PFUZE100_VGEN_REG(PFUZE3000, VLDO1, PFUZE100_VGEN1VOL, 1800000, 3300000, 100000), + PFUZE100_VGEN_REG(PFUZE3000, VLDO2, PFUZE100_VGEN2VOL, 800000, 1550000, 50000), + PFUZE3000_VCC_REG(PFUZE3000, VCCSD, PFUZE100_VGEN3VOL, 2850000, 3300000, 150000), + PFUZE3000_VCC_REG(PFUZE3000, V33, PFUZE100_VGEN4VOL, 2850000, 3300000, 150000), + PFUZE100_VGEN_REG(PFUZE3000, VLDO3, PFUZE100_VGEN5VOL, 1800000, 3300000, 100000), + PFUZE100_VGEN_REG(PFUZE3000, VLDO4, PFUZE100_VGEN6VOL, 1800000, 3300000, 100000), +}; + static struct pfuze_regulator *pfuze_regulators; #ifdef CONFIG_OF @@ -294,6 +374,24 @@ static struct of_regulator_match pfuze200_matches[] = { { .name = "vgen6", }, }; +/* PFUZE3000 */ +static struct of_regulator_match pfuze3000_matches[] = { + + { .name = "sw1a", }, + { .name = "sw1b", }, + { .name = "sw2", }, + { .name = "sw3", }, + { .name = "swbst", }, + { .name = "vsnvs", }, + { .name = "vrefddr", }, + { .name = "vldo1", }, + { .name = "vldo2", }, + { .name = "vccsd", }, + { .name = "v33", }, + { .name = "vldo3", }, + { .name = "vldo4", }, +}; + static struct of_regulator_match *pfuze_matches; static int pfuze_parse_regulators_dt(struct pfuze_chip *chip) @@ -313,6 +411,11 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip) } switch (chip->chip_id) { + case PFUZE3000: + pfuze_matches = pfuze3000_matches; + ret = of_regulator_match(dev, parent, pfuze3000_matches, + ARRAY_SIZE(pfuze3000_matches)); + break; case PFUZE200: pfuze_matches = pfuze200_matches; ret = of_regulator_match(dev, parent, pfuze200_matches, @@ -378,7 +481,8 @@ static int pfuze_identify(struct pfuze_chip *pfuze_chip) * as ID=8 in PFUZE100 */ dev_info(pfuze_chip->dev, "Assuming misprogrammed ID=0x8"); - } else if ((value & 0x0f) != pfuze_chip->chip_id) { + } else if ((value & 0x0f) != pfuze_chip->chip_id && + (value & 0xf0) >> 4 != pfuze_chip->chip_id) { /* device id NOT match with your setting */ dev_warn(pfuze_chip->dev, "Illegal ID: %x\n", value); return -ENODEV; @@ -417,7 +521,7 @@ static int pfuze100_regulator_probe(struct i2c_client *client, int i, ret; const struct of_device_id *match; u32 regulator_num; - u32 sw_check_start, sw_check_end; + u32 sw_check_start, sw_check_end, sw_hi = 0x40; pfuze_chip = devm_kzalloc(&client->dev, sizeof(*pfuze_chip), GFP_KERNEL); @@ -458,13 +562,19 @@ static int pfuze100_regulator_probe(struct i2c_client *client, /* use the right regulators after identify the right device */ switch (pfuze_chip->chip_id) { + case PFUZE3000: + pfuze_regulators = pfuze3000_regulators; + regulator_num = ARRAY_SIZE(pfuze3000_regulators); + sw_check_start = PFUZE3000_SW2; + sw_check_end = PFUZE3000_SW2; + sw_hi = 1 << 3; + break; case PFUZE200: pfuze_regulators = pfuze200_regulators; regulator_num = ARRAY_SIZE(pfuze200_regulators); sw_check_start = PFUZE200_SW2; sw_check_end = PFUZE200_SW3B; break; - case PFUZE100: default: pfuze_regulators = pfuze100_regulators; @@ -474,7 +584,8 @@ static int pfuze100_regulator_probe(struct i2c_client *client, break; } dev_info(&client->dev, "pfuze%s found.\n", - (pfuze_chip->chip_id == PFUZE100) ? "100" : "200"); + (pfuze_chip->chip_id == PFUZE100) ? "100" : + ((pfuze_chip->chip_id == PFUZE200) ? "200" : "3000")); memcpy(pfuze_chip->regulator_descs, pfuze_regulators, sizeof(pfuze_chip->regulator_descs)); @@ -498,10 +609,15 @@ static int pfuze100_regulator_probe(struct i2c_client *client, /* SW2~SW4 high bit check and modify the voltage value table */ if (i >= sw_check_start && i <= sw_check_end) { regmap_read(pfuze_chip->regmap, desc->vsel_reg, &val); - if (val & 0x40) { - desc->min_uV = 800000; - desc->uV_step = 50000; - desc->n_voltages = 51; + if (val & sw_hi) { + if (pfuze_chip->chip_id == PFUZE3000) { + desc->volt_table = pfuze3000_sw2hi; + desc->n_voltages = ARRAY_SIZE(pfuze3000_sw2hi); + } else { + desc->min_uV = 800000; + desc->uV_step = 50000; + desc->n_voltages = 51; + } } } diff --git a/drivers/regulator/qcom_rpm-regulator.c b/drivers/regulator/qcom_rpm-regulator.c index 8364ff331a81..e8647f7cf25e 100644 --- a/drivers/regulator/qcom_rpm-regulator.c +++ b/drivers/regulator/qcom_rpm-regulator.c @@ -227,9 +227,11 @@ static int rpm_reg_set_mV_sel(struct regulator_dev *rdev, return uV; mutex_lock(&vreg->lock); - vreg->uV = uV; if (vreg->is_enabled) - ret = rpm_reg_write(vreg, req, vreg->uV / 1000); + ret = rpm_reg_write(vreg, req, uV / 1000); + + if (!ret) + vreg->uV = uV; mutex_unlock(&vreg->lock); return ret; @@ -252,9 +254,11 @@ static int rpm_reg_set_uV_sel(struct regulator_dev *rdev, return uV; mutex_lock(&vreg->lock); - vreg->uV = uV; if (vreg->is_enabled) - ret = rpm_reg_write(vreg, req, vreg->uV); + ret = rpm_reg_write(vreg, req, uV); + + if (!ret) + vreg->uV = uV; mutex_unlock(&vreg->lock); return ret; @@ -674,6 +678,7 @@ static int rpm_reg_probe(struct platform_device *pdev) vreg->desc.owner = THIS_MODULE; vreg->desc.type = REGULATOR_VOLTAGE; vreg->desc.name = pdev->dev.of_node->name; + vreg->desc.supply_name = "vin"; vreg->rpm = dev_get_drvdata(pdev->dev.parent); if (!vreg->rpm) { @@ -768,7 +773,7 @@ static int rpm_reg_probe(struct platform_device *pdev) break; } - if (force_mode < 0) { + if (force_mode == -1) { dev_err(&pdev->dev, "invalid force mode\n"); return -EINVAL; } diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index c94a3e0f3b91..1f93b752a81c 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -97,7 +97,7 @@ static int rk808_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay) RK808_RAMP_RATE_MASK, ramp_value); } -int rk808_set_suspend_voltage(struct regulator_dev *rdev, int uv) +static int rk808_set_suspend_voltage(struct regulator_dev *rdev, int uv) { unsigned int reg; int sel = regulator_map_voltage_linear_range(rdev, uv, uv); @@ -112,7 +112,7 @@ int rk808_set_suspend_voltage(struct regulator_dev *rdev, int uv) sel); } -int rk808_set_suspend_enable(struct regulator_dev *rdev) +static int rk808_set_suspend_enable(struct regulator_dev *rdev) { unsigned int reg; @@ -123,7 +123,7 @@ int rk808_set_suspend_enable(struct regulator_dev *rdev) 0); } -int rk808_set_suspend_disable(struct regulator_dev *rdev) +static int rk808_set_suspend_disable(struct regulator_dev *rdev) { unsigned int reg; diff --git a/drivers/regulator/rt5033-regulator.c b/drivers/regulator/rt5033-regulator.c index 870cc49438db..96d2c18e051a 100644 --- a/drivers/regulator/rt5033-regulator.c +++ b/drivers/regulator/rt5033-regulator.c @@ -36,6 +36,8 @@ static struct regulator_ops rt5033_buck_ops = { static const struct regulator_desc rt5033_supported_regulators[] = { [RT5033_BUCK] = { .name = "BUCK", + .of_match = of_match_ptr("BUCK"), + .regulators_node = of_match_ptr("regulators"), .id = RT5033_BUCK, .ops = &rt5033_buck_ops, .type = REGULATOR_VOLTAGE, @@ -50,6 +52,8 @@ static const struct regulator_desc rt5033_supported_regulators[] = { }, [RT5033_LDO] = { .name = "LDO", + .of_match = of_match_ptr("LDO"), + .regulators_node = of_match_ptr("regulators"), .id = RT5033_LDO, .ops = &rt5033_buck_ops, .type = REGULATOR_VOLTAGE, @@ -64,6 +68,8 @@ static const struct regulator_desc rt5033_supported_regulators[] = { }, [RT5033_SAFE_LDO] = { .name = "SAFE_LDO", + .of_match = of_match_ptr("SAFE_LDO"), + .regulators_node = of_match_ptr("regulators"), .id = RT5033_SAFE_LDO, .ops = &rt5033_safe_ldo_ops, .type = REGULATOR_VOLTAGE, @@ -81,7 +87,7 @@ static int rt5033_regulator_probe(struct platform_device *pdev) int ret, i; struct regulator_config config = {}; - config.dev = &pdev->dev; + config.dev = rt5033->dev; config.driver_data = rt5033; for (i = 0; i < ARRAY_SIZE(rt5033_supported_regulators); i++) { diff --git a/drivers/regulator/tps65023-regulator.c b/drivers/regulator/tps65023-regulator.c index 7380af8bd50d..b941e564b3f3 100644 --- a/drivers/regulator/tps65023-regulator.c +++ b/drivers/regulator/tps65023-regulator.c @@ -173,7 +173,7 @@ static int tps65023_dcdc_set_voltage_sel(struct regulator_dev *dev, } /* Operations permitted on VDCDCx */ -static struct regulator_ops tps65023_dcdc_ops = { +static const struct regulator_ops tps65023_dcdc_ops = { .is_enabled = regulator_is_enabled_regmap, .enable = regulator_enable_regmap, .disable = regulator_disable_regmap, @@ -184,7 +184,7 @@ static struct regulator_ops tps65023_dcdc_ops = { }; /* Operations permitted on LDOx */ -static struct regulator_ops tps65023_ldo_ops = { +static const struct regulator_ops tps65023_ldo_ops = { .is_enabled = regulator_is_enabled_regmap, .enable = regulator_enable_regmap, .disable = regulator_disable_regmap, @@ -194,7 +194,7 @@ static struct regulator_ops tps65023_ldo_ops = { .map_voltage = regulator_map_voltage_ascend, }; -static struct regmap_config tps65023_regmap_config = { +static const struct regmap_config tps65023_regmap_config = { .reg_bits = 8, .val_bits = 8, }; diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c index 4aa60d74004e..6c719f23520a 100644 --- a/drivers/rtc/hctosys.c +++ b/drivers/rtc/hctosys.c @@ -26,7 +26,7 @@ static int __init rtc_hctosys(void) { int err = -ENODEV; struct rtc_time tm; - struct timespec tv = { + struct timespec64 tv64 = { .tv_nsec = NSEC_PER_SEC >> 1, }; struct rtc_device *rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE); @@ -45,25 +45,17 @@ static int __init rtc_hctosys(void) } - err = rtc_valid_tm(&tm); - if (err) { - dev_err(rtc->dev.parent, - "hctosys: invalid date/time\n"); - goto err_invalid; - } - - rtc_tm_to_time(&tm, &tv.tv_sec); + tv64.tv_sec = rtc_tm_to_time64(&tm); - err = do_settimeofday(&tv); + err = do_settimeofday64(&tv64); dev_info(rtc->dev.parent, "setting system clock to " - "%d-%02d-%02d %02d:%02d:%02d UTC (%u)\n", + "%d-%02d-%02d %02d:%02d:%02d UTC (%lld)\n", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, - (unsigned int) tv.tv_sec); + (long long) tv64.tv_sec); -err_invalid: err_read: rtc_class_close(rtc); diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 45bfc28ee3aa..37215cf983e9 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -73,10 +73,8 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm) else if (rtc->ops->set_time) err = rtc->ops->set_time(rtc->dev.parent, tm); else if (rtc->ops->set_mmss) { - unsigned long secs; - err = rtc_tm_to_time(tm, &secs); - if (err == 0) - err = rtc->ops->set_mmss(rtc->dev.parent, secs); + time64_t secs64 = rtc_tm_to_time64(tm); + err = rtc->ops->set_mmss(rtc->dev.parent, secs64); } else err = -EINVAL; @@ -105,7 +103,7 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs) err = rtc->ops->read_time(rtc->dev.parent, &old); if (err == 0) { - rtc_time_to_tm(secs, &new); + rtc_time64_to_tm(secs, &new); /* * avoid writing when we're going to change the day of @@ -157,7 +155,7 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) int err; struct rtc_time before, now; int first_time = 1; - unsigned long t_now, t_alm; + time64_t t_now, t_alm; enum { none, day, month, year } missing = none; unsigned days; @@ -258,8 +256,8 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) } /* with luck, no rollover is needed */ - rtc_tm_to_time(&now, &t_now); - rtc_tm_to_time(&alarm->time, &t_alm); + t_now = rtc_tm_to_time64(&now); + t_alm = rtc_tm_to_time64(&alarm->time); if (t_now < t_alm) goto done; @@ -273,7 +271,7 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) case day: dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day"); t_alm += 24 * 60 * 60; - rtc_time_to_tm(t_alm, &alarm->time); + rtc_time64_to_tm(t_alm, &alarm->time); break; /* Month rollover ... if it's the 31th, an alarm on the 3rd will @@ -346,19 +344,19 @@ EXPORT_SYMBOL_GPL(rtc_read_alarm); static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { struct rtc_time tm; - long now, scheduled; + time64_t now, scheduled; int err; err = rtc_valid_tm(&alarm->time); if (err) return err; - rtc_tm_to_time(&alarm->time, &scheduled); + scheduled = rtc_tm_to_time64(&alarm->time); /* Make sure we're not setting alarms in the past */ err = __rtc_read_time(rtc, &tm); if (err) return err; - rtc_tm_to_time(&tm, &now); + now = rtc_tm_to_time64(&tm); if (scheduled <= now) return -ETIME; /* diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c index d04939369251..799c34bcb26f 100644 --- a/drivers/rtc/rtc-dev.c +++ b/drivers/rtc/rtc-dev.c @@ -304,12 +304,12 @@ static long rtc_dev_ioctl(struct file *file, * Not supported here. */ { - unsigned long now, then; + time64_t now, then; err = rtc_read_time(rtc, &tm); if (err < 0) return err; - rtc_tm_to_time(&tm, &now); + now = rtc_tm_to_time64(&tm); alarm.time.tm_mday = tm.tm_mday; alarm.time.tm_mon = tm.tm_mon; @@ -317,11 +317,11 @@ static long rtc_dev_ioctl(struct file *file, err = rtc_valid_tm(&alarm.time); if (err < 0) return err; - rtc_tm_to_time(&alarm.time, &then); + then = rtc_tm_to_time64(&alarm.time); /* alarm may need to wrap into tomorrow */ if (then < now) { - rtc_time_to_tm(now + 24 * 60 * 60, &tm); + rtc_time64_to_tm(now + 24 * 60 * 60, &tm); alarm.time.tm_mday = tm.tm_mday; alarm.time.tm_mon = tm.tm_mon; alarm.time.tm_year = tm.tm_year; diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index b37b0c80bd5a..cb989cd00b14 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -218,6 +218,7 @@ static int __init efi_rtc_probe(struct platform_device *dev) if (IS_ERR(rtc)) return PTR_ERR(rtc); + rtc->uie_unsupported = 1; platform_set_drvdata(dev, rtc); return 0; diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c index bf3e242ccc5c..eb71872d0361 100644 --- a/drivers/rtc/systohc.c +++ b/drivers/rtc/systohc.c @@ -20,16 +20,16 @@ * * If temporary failure is indicated the caller should try again 'soon' */ -int rtc_set_ntp_time(struct timespec now) +int rtc_set_ntp_time(struct timespec64 now) { struct rtc_device *rtc; struct rtc_time tm; int err = -ENODEV; if (now.tv_nsec < (NSEC_PER_SEC >> 1)) - rtc_time_to_tm(now.tv_sec, &tm); + rtc_time64_to_tm(now.tv_sec, &tm); else - rtc_time_to_tm(now.tv_sec + 1, &tm); + rtc_time64_to_tm(now.tv_sec + 1, &tm); rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE); if (rtc) { diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 99829985c1a1..95ccedabba4f 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -185,6 +185,16 @@ config SPI_DAVINCI help SPI master controller for DaVinci/DA8x/OMAP-L/AM1x SPI modules. +config SPI_DLN2 + tristate "Diolan DLN-2 USB SPI adapter" + depends on MFD_DLN2 + help + If you say yes to this option, support will be included for Diolan + DLN2, a USB to SPI interface. + + This driver can also be built as a module. If so, the module + will be called spi-dln2. + config SPI_EFM32 tristate "EFM32 SPI controller" depends on OF && ARM && (ARCH_EFM32 || COMPILE_TEST) @@ -279,7 +289,7 @@ config SPI_FSL_CPM depends on FSL_SOC config SPI_FSL_SPI - bool "Freescale SPI controller and Aeroflex Gaisler GRLIB SPI controller" + tristate "Freescale SPI controller and Aeroflex Gaisler GRLIB SPI controller" depends on OF select SPI_FSL_LIB select SPI_FSL_CPM if FSL_SOC @@ -292,7 +302,6 @@ config SPI_FSL_SPI config SPI_FSL_DSPI tristate "Freescale DSPI controller" - select SPI_BITBANG select REGMAP_MMIO depends on SOC_VF610 || COMPILE_TEST help @@ -300,7 +309,7 @@ config SPI_FSL_DSPI mode. VF610 platform uses the controller. config SPI_FSL_ESPI - bool "Freescale eSPI controller" + tristate "Freescale eSPI controller" depends on FSL_SOC select SPI_FSL_LIB help @@ -460,7 +469,6 @@ config SPI_S3C24XX_FIQ config SPI_S3C64XX tristate "Samsung S3C64XX series type SPI" depends on (PLAT_SAMSUNG || ARCH_EXYNOS) - select S3C64XX_PL080 if ARCH_S3C64XX help SPI driver for Samsung S3C64XX and newer SoCs. @@ -503,6 +511,13 @@ config SPI_SIRF help SPI driver for CSR SiRFprimaII SoCs +config SPI_ST_SSC4 + tristate "STMicroelectronics SPI SSC-based driver" + depends on ARCH_STI + help + STMicroelectronics SoCs support for SPI. If you say yes to + this option, support will be included for the SSC driven SPI. + config SPI_SUN4I tristate "Allwinner A10 SoCs SPI controller" depends on ARCH_SUNXI || COMPILE_TEST @@ -595,7 +610,6 @@ config SPI_XTENSA_XTFPGA 16 bit words in SPI mode 0, automatically asserting CS on transfer start and deasserting on end. - config SPI_NUC900 tristate "Nuvoton NUC900 series SPI" depends on ARCH_W90X900 diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index 6b9d2ac629cc..d8cbf654976b 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_SPI_CADENCE) += spi-cadence.o obj-$(CONFIG_SPI_CLPS711X) += spi-clps711x.o obj-$(CONFIG_SPI_COLDFIRE_QSPI) += spi-coldfire-qspi.o obj-$(CONFIG_SPI_DAVINCI) += spi-davinci.o +obj-$(CONFIG_SPI_DLN2) += spi-dln2.o obj-$(CONFIG_SPI_DESIGNWARE) += spi-dw.o obj-$(CONFIG_SPI_DW_MMIO) += spi-dw-mmio.o obj-$(CONFIG_SPI_DW_PCI) += spi-dw-midpci.o @@ -76,6 +77,7 @@ obj-$(CONFIG_SPI_SH_HSPI) += spi-sh-hspi.o obj-$(CONFIG_SPI_SH_MSIOF) += spi-sh-msiof.o obj-$(CONFIG_SPI_SH_SCI) += spi-sh-sci.o obj-$(CONFIG_SPI_SIRF) += spi-sirf.o +obj-$(CONFIG_SPI_ST_SSC4) += spi-st-ssc4.o obj-$(CONFIG_SPI_SUN4I) += spi-sun4i.o obj-$(CONFIG_SPI_SUN6I) += spi-sun6i.o obj-$(CONFIG_SPI_TEGRA114) += spi-tegra114.o diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 23d8f5f56579..9af7841f2e8c 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1046,6 +1046,7 @@ static int atmel_spi_one_transfer(struct spi_master *master, struct atmel_spi_device *asd; int timeout; int ret; + unsigned long dma_timeout; as = spi_master_get_devdata(master); @@ -1103,15 +1104,12 @@ static int atmel_spi_one_transfer(struct spi_master *master, /* interrupts are disabled, so free the lock for schedule */ atmel_spi_unlock(as); - ret = wait_for_completion_timeout(&as->xfer_completion, - SPI_DMA_TIMEOUT); + dma_timeout = wait_for_completion_timeout(&as->xfer_completion, + SPI_DMA_TIMEOUT); atmel_spi_lock(as); - if (WARN_ON(ret == 0)) { - dev_err(&spi->dev, - "spi trasfer timeout, err %d\n", ret); + if (WARN_ON(dma_timeout == 0)) { + dev_err(&spi->dev, "spi transfer timeout\n"); as->done_status = -EIO; - } else { - ret = 0; } if (as->done_status) diff --git a/drivers/spi/spi-au1550.c b/drivers/spi/spi-au1550.c index 326f47973684..f45e085c01a6 100644 --- a/drivers/spi/spi-au1550.c +++ b/drivers/spi/spi-au1550.c @@ -15,10 +15,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/init.h> diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 98aab457b24d..419a782ab6d5 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -17,10 +17,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include <linux/clk.h> diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index c20530982e26..e73e2b052c9c 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -13,10 +13,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, */ #include <linux/kernel.h> diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c index dc7d2c2d643e..5ef6638d5e8a 100644 --- a/drivers/spi/spi-bitbang.c +++ b/drivers/spi/spi-bitbang.c @@ -10,10 +10,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/spinlock.h> diff --git a/drivers/spi/spi-butterfly.c b/drivers/spi/spi-butterfly.c index ee4f91ccd8fd..9a95862986c8 100644 --- a/drivers/spi/spi-butterfly.c +++ b/drivers/spi/spi-butterfly.c @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/kernel.h> #include <linux/init.h> diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c index 41b5dc4445f6..688956ff5095 100644 --- a/drivers/spi/spi-coldfire-qspi.c +++ b/drivers/spi/spi-coldfire-qspi.c @@ -12,11 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA - * */ #include <linux/kernel.h> diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index b3707badb1e5..5e991065f5b0 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/interrupt.h> diff --git a/drivers/spi/spi-dln2.c b/drivers/spi/spi-dln2.c new file mode 100644 index 000000000000..3b7d91d94fea --- /dev/null +++ b/drivers/spi/spi-dln2.c @@ -0,0 +1,881 @@ +/* + * Driver for the Diolan DLN-2 USB-SPI adapter + * + * Copyright (c) 2014 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/mfd/dln2.h> +#include <linux/spi/spi.h> +#include <linux/pm_runtime.h> +#include <asm/unaligned.h> + +#define DLN2_SPI_MODULE_ID 0x02 +#define DLN2_SPI_CMD(cmd) DLN2_CMD(cmd, DLN2_SPI_MODULE_ID) + +/* SPI commands */ +#define DLN2_SPI_GET_PORT_COUNT DLN2_SPI_CMD(0x00) +#define DLN2_SPI_ENABLE DLN2_SPI_CMD(0x11) +#define DLN2_SPI_DISABLE DLN2_SPI_CMD(0x12) +#define DLN2_SPI_IS_ENABLED DLN2_SPI_CMD(0x13) +#define DLN2_SPI_SET_MODE DLN2_SPI_CMD(0x14) +#define DLN2_SPI_GET_MODE DLN2_SPI_CMD(0x15) +#define DLN2_SPI_SET_FRAME_SIZE DLN2_SPI_CMD(0x16) +#define DLN2_SPI_GET_FRAME_SIZE DLN2_SPI_CMD(0x17) +#define DLN2_SPI_SET_FREQUENCY DLN2_SPI_CMD(0x18) +#define DLN2_SPI_GET_FREQUENCY DLN2_SPI_CMD(0x19) +#define DLN2_SPI_READ_WRITE DLN2_SPI_CMD(0x1A) +#define DLN2_SPI_READ DLN2_SPI_CMD(0x1B) +#define DLN2_SPI_WRITE DLN2_SPI_CMD(0x1C) +#define DLN2_SPI_SET_DELAY_BETWEEN_SS DLN2_SPI_CMD(0x20) +#define DLN2_SPI_GET_DELAY_BETWEEN_SS DLN2_SPI_CMD(0x21) +#define DLN2_SPI_SET_DELAY_AFTER_SS DLN2_SPI_CMD(0x22) +#define DLN2_SPI_GET_DELAY_AFTER_SS DLN2_SPI_CMD(0x23) +#define DLN2_SPI_SET_DELAY_BETWEEN_FRAMES DLN2_SPI_CMD(0x24) +#define DLN2_SPI_GET_DELAY_BETWEEN_FRAMES DLN2_SPI_CMD(0x25) +#define DLN2_SPI_SET_SS DLN2_SPI_CMD(0x26) +#define DLN2_SPI_GET_SS DLN2_SPI_CMD(0x27) +#define DLN2_SPI_RELEASE_SS DLN2_SPI_CMD(0x28) +#define DLN2_SPI_SS_VARIABLE_ENABLE DLN2_SPI_CMD(0x2B) +#define DLN2_SPI_SS_VARIABLE_DISABLE DLN2_SPI_CMD(0x2C) +#define DLN2_SPI_SS_VARIABLE_IS_ENABLED DLN2_SPI_CMD(0x2D) +#define DLN2_SPI_SS_AAT_ENABLE DLN2_SPI_CMD(0x2E) +#define DLN2_SPI_SS_AAT_DISABLE DLN2_SPI_CMD(0x2F) +#define DLN2_SPI_SS_AAT_IS_ENABLED DLN2_SPI_CMD(0x30) +#define DLN2_SPI_SS_BETWEEN_FRAMES_ENABLE DLN2_SPI_CMD(0x31) +#define DLN2_SPI_SS_BETWEEN_FRAMES_DISABLE DLN2_SPI_CMD(0x32) +#define DLN2_SPI_SS_BETWEEN_FRAMES_IS_ENABLED DLN2_SPI_CMD(0x33) +#define DLN2_SPI_SET_CPHA DLN2_SPI_CMD(0x34) +#define DLN2_SPI_GET_CPHA DLN2_SPI_CMD(0x35) +#define DLN2_SPI_SET_CPOL DLN2_SPI_CMD(0x36) +#define DLN2_SPI_GET_CPOL DLN2_SPI_CMD(0x37) +#define DLN2_SPI_SS_MULTI_ENABLE DLN2_SPI_CMD(0x38) +#define DLN2_SPI_SS_MULTI_DISABLE DLN2_SPI_CMD(0x39) +#define DLN2_SPI_SS_MULTI_IS_ENABLED DLN2_SPI_CMD(0x3A) +#define DLN2_SPI_GET_SUPPORTED_MODES DLN2_SPI_CMD(0x40) +#define DLN2_SPI_GET_SUPPORTED_CPHA_VALUES DLN2_SPI_CMD(0x41) +#define DLN2_SPI_GET_SUPPORTED_CPOL_VALUES DLN2_SPI_CMD(0x42) +#define DLN2_SPI_GET_SUPPORTED_FRAME_SIZES DLN2_SPI_CMD(0x43) +#define DLN2_SPI_GET_SS_COUNT DLN2_SPI_CMD(0x44) +#define DLN2_SPI_GET_MIN_FREQUENCY DLN2_SPI_CMD(0x45) +#define DLN2_SPI_GET_MAX_FREQUENCY DLN2_SPI_CMD(0x46) +#define DLN2_SPI_GET_MIN_DELAY_BETWEEN_SS DLN2_SPI_CMD(0x47) +#define DLN2_SPI_GET_MAX_DELAY_BETWEEN_SS DLN2_SPI_CMD(0x48) +#define DLN2_SPI_GET_MIN_DELAY_AFTER_SS DLN2_SPI_CMD(0x49) +#define DLN2_SPI_GET_MAX_DELAY_AFTER_SS DLN2_SPI_CMD(0x4A) +#define DLN2_SPI_GET_MIN_DELAY_BETWEEN_FRAMES DLN2_SPI_CMD(0x4B) +#define DLN2_SPI_GET_MAX_DELAY_BETWEEN_FRAMES DLN2_SPI_CMD(0x4C) + +#define DLN2_SPI_MAX_XFER_SIZE 256 +#define DLN2_SPI_BUF_SIZE (DLN2_SPI_MAX_XFER_SIZE + 16) +#define DLN2_SPI_ATTR_LEAVE_SS_LOW BIT(0) +#define DLN2_TRANSFERS_WAIT_COMPLETE 1 +#define DLN2_TRANSFERS_CANCEL 0 +#define DLN2_RPM_AUTOSUSPEND_TIMEOUT 2000 + +struct dln2_spi { + struct platform_device *pdev; + struct spi_master *master; + u8 port; + + /* + * This buffer will be used mainly for read/write operations. Since + * they're quite large, we cannot use the stack. Protection is not + * needed because all SPI communication is serialized by the SPI core. + */ + void *buf; + + u8 bpw; + u32 speed; + u16 mode; + u8 cs; +}; + +/* + * Enable/Disable SPI module. The disable command will wait for transfers to + * complete first. + */ +static int dln2_spi_enable(struct dln2_spi *dln2, bool enable) +{ + u16 cmd; + struct { + u8 port; + u8 wait_for_completion; + } tx; + unsigned len = sizeof(tx); + + tx.port = dln2->port; + + if (enable) { + cmd = DLN2_SPI_ENABLE; + len -= sizeof(tx.wait_for_completion); + } else { + tx.wait_for_completion = DLN2_TRANSFERS_WAIT_COMPLETE; + cmd = DLN2_SPI_DISABLE; + } + + return dln2_transfer_tx(dln2->pdev, cmd, &tx, len); +} + +/* + * Select/unselect multiple CS lines. The selected lines will be automatically + * toggled LOW/HIGH by the board firmware during transfers, provided they're + * enabled first. + * + * Ex: cs_mask = 0x03 -> CS0 & CS1 will be selected and the next WR/RD operation + * will toggle the lines LOW/HIGH automatically. + */ +static int dln2_spi_cs_set(struct dln2_spi *dln2, u8 cs_mask) +{ + struct { + u8 port; + u8 cs; + } tx; + + tx.port = dln2->port; + + /* + * According to Diolan docs, "a slave device can be selected by changing + * the corresponding bit value to 0". The rest must be set to 1. Hence + * the bitwise NOT in front. + */ + tx.cs = ~cs_mask; + + return dln2_transfer_tx(dln2->pdev, DLN2_SPI_SET_SS, &tx, sizeof(tx)); +} + +/* + * Select one CS line. The other lines will be un-selected. + */ +static int dln2_spi_cs_set_one(struct dln2_spi *dln2, u8 cs) +{ + return dln2_spi_cs_set(dln2, BIT(cs)); +} + +/* + * Enable/disable CS lines for usage. The module has to be disabled first. + */ +static int dln2_spi_cs_enable(struct dln2_spi *dln2, u8 cs_mask, bool enable) +{ + struct { + u8 port; + u8 cs; + } tx; + u16 cmd; + + tx.port = dln2->port; + tx.cs = cs_mask; + cmd = enable ? DLN2_SPI_SS_MULTI_ENABLE : DLN2_SPI_SS_MULTI_DISABLE; + + return dln2_transfer_tx(dln2->pdev, cmd, &tx, sizeof(tx)); +} + +static int dln2_spi_cs_enable_all(struct dln2_spi *dln2, bool enable) +{ + u8 cs_mask = GENMASK(dln2->master->num_chipselect - 1, 0); + + return dln2_spi_cs_enable(dln2, cs_mask, enable); +} + +static int dln2_spi_get_cs_num(struct dln2_spi *dln2, u16 *cs_num) +{ + int ret; + struct { + u8 port; + } tx; + struct { + __le16 cs_count; + } rx; + unsigned rx_len = sizeof(rx); + + tx.port = dln2->port; + ret = dln2_transfer(dln2->pdev, DLN2_SPI_GET_SS_COUNT, &tx, sizeof(tx), + &rx, &rx_len); + if (ret < 0) + return ret; + if (rx_len < sizeof(rx)) + return -EPROTO; + + *cs_num = le16_to_cpu(rx.cs_count); + + dev_dbg(&dln2->pdev->dev, "cs_num = %d\n", *cs_num); + + return 0; +} + +static int dln2_spi_get_speed(struct dln2_spi *dln2, u16 cmd, u32 *freq) +{ + int ret; + struct { + u8 port; + } tx; + struct { + __le32 speed; + } rx; + unsigned rx_len = sizeof(rx); + + tx.port = dln2->port; + + ret = dln2_transfer(dln2->pdev, cmd, &tx, sizeof(tx), &rx, &rx_len); + if (ret < 0) + return ret; + if (rx_len < sizeof(rx)) + return -EPROTO; + + *freq = le32_to_cpu(rx.speed); + + return 0; +} + +/* + * Get bus min/max frequencies. + */ +static int dln2_spi_get_speed_range(struct dln2_spi *dln2, u32 *fmin, u32 *fmax) +{ + int ret; + + ret = dln2_spi_get_speed(dln2, DLN2_SPI_GET_MIN_FREQUENCY, fmin); + if (ret < 0) + return ret; + + ret = dln2_spi_get_speed(dln2, DLN2_SPI_GET_MAX_FREQUENCY, fmax); + if (ret < 0) + return ret; + + dev_dbg(&dln2->pdev->dev, "freq_min = %d, freq_max = %d\n", + *fmin, *fmax); + + return 0; +} + +/* + * Set the bus speed. The module will automatically round down to the closest + * available frequency and returns it. The module has to be disabled first. + */ +static int dln2_spi_set_speed(struct dln2_spi *dln2, u32 speed) +{ + int ret; + struct { + u8 port; + __le32 speed; + } __packed tx; + struct { + __le32 speed; + } rx; + int rx_len = sizeof(rx); + + tx.port = dln2->port; + tx.speed = cpu_to_le32(speed); + + ret = dln2_transfer(dln2->pdev, DLN2_SPI_SET_FREQUENCY, &tx, sizeof(tx), + &rx, &rx_len); + if (ret < 0) + return ret; + if (rx_len < sizeof(rx)) + return -EPROTO; + + return 0; +} + +/* + * Change CPOL & CPHA. The module has to be disabled first. + */ +static int dln2_spi_set_mode(struct dln2_spi *dln2, u8 mode) +{ + struct { + u8 port; + u8 mode; + } tx; + + tx.port = dln2->port; + tx.mode = mode; + + return dln2_transfer_tx(dln2->pdev, DLN2_SPI_SET_MODE, &tx, sizeof(tx)); +} + +/* + * Change frame size. The module has to be disabled first. + */ +static int dln2_spi_set_bpw(struct dln2_spi *dln2, u8 bpw) +{ + struct { + u8 port; + u8 bpw; + } tx; + + tx.port = dln2->port; + tx.bpw = bpw; + + return dln2_transfer_tx(dln2->pdev, DLN2_SPI_SET_FRAME_SIZE, + &tx, sizeof(tx)); +} + +static int dln2_spi_get_supported_frame_sizes(struct dln2_spi *dln2, + u32 *bpw_mask) +{ + int ret; + struct { + u8 port; + } tx; + struct { + u8 count; + u8 frame_sizes[36]; + } *rx = dln2->buf; + unsigned rx_len = sizeof(*rx); + int i; + + tx.port = dln2->port; + + ret = dln2_transfer(dln2->pdev, DLN2_SPI_GET_SUPPORTED_FRAME_SIZES, + &tx, sizeof(tx), rx, &rx_len); + if (ret < 0) + return ret; + if (rx_len < sizeof(*rx)) + return -EPROTO; + if (rx->count > ARRAY_SIZE(rx->frame_sizes)) + return -EPROTO; + + *bpw_mask = 0; + for (i = 0; i < rx->count; i++) + *bpw_mask |= BIT(rx->frame_sizes[i] - 1); + + dev_dbg(&dln2->pdev->dev, "bpw_mask = 0x%X\n", *bpw_mask); + + return 0; +} + +/* + * Copy the data to DLN2 buffer and change the byte order to LE, requested by + * DLN2 module. SPI core makes sure that the data length is a multiple of word + * size. + */ +static int dln2_spi_copy_to_buf(u8 *dln2_buf, const u8 *src, u16 len, u8 bpw) +{ +#ifdef __LITTLE_ENDIAN + memcpy(dln2_buf, src, len); +#else + if (bpw <= 8) { + memcpy(dln2_buf, src, len); + } else if (bpw <= 16) { + __le16 *d = (__le16 *)dln2_buf; + u16 *s = (u16 *)src; + + len = len / 2; + while (len--) + *d++ = cpu_to_le16p(s++); + } else { + __le32 *d = (__le32 *)dln2_buf; + u32 *s = (u32 *)src; + + len = len / 4; + while (len--) + *d++ = cpu_to_le32p(s++); + } +#endif + + return 0; +} + +/* + * Copy the data from DLN2 buffer and convert to CPU byte order since the DLN2 + * buffer is LE ordered. SPI core makes sure that the data length is a multiple + * of word size. The RX dln2_buf is 2 byte aligned so, for BE, we have to make + * sure we avoid unaligned accesses for 32 bit case. + */ +static int dln2_spi_copy_from_buf(u8 *dest, const u8 *dln2_buf, u16 len, u8 bpw) +{ +#ifdef __LITTLE_ENDIAN + memcpy(dest, dln2_buf, len); +#else + if (bpw <= 8) { + memcpy(dest, dln2_buf, len); + } else if (bpw <= 16) { + u16 *d = (u16 *)dest; + __le16 *s = (__le16 *)dln2_buf; + + len = len / 2; + while (len--) + *d++ = le16_to_cpup(s++); + } else { + u32 *d = (u32 *)dest; + __le32 *s = (__le32 *)dln2_buf; + + len = len / 4; + while (len--) + *d++ = get_unaligned_le32(s++); + } +#endif + + return 0; +} + +/* + * Perform one write operation. + */ +static int dln2_spi_write_one(struct dln2_spi *dln2, const u8 *data, + u16 data_len, u8 attr) +{ + struct { + u8 port; + __le16 size; + u8 attr; + u8 buf[DLN2_SPI_MAX_XFER_SIZE]; + } __packed *tx = dln2->buf; + unsigned tx_len; + + BUILD_BUG_ON(sizeof(*tx) > DLN2_SPI_BUF_SIZE); + + if (data_len > DLN2_SPI_MAX_XFER_SIZE) + return -EINVAL; + + tx->port = dln2->port; + tx->size = cpu_to_le16(data_len); + tx->attr = attr; + + dln2_spi_copy_to_buf(tx->buf, data, data_len, dln2->bpw); + + tx_len = sizeof(*tx) + data_len - DLN2_SPI_MAX_XFER_SIZE; + return dln2_transfer_tx(dln2->pdev, DLN2_SPI_WRITE, tx, tx_len); +} + +/* + * Perform one read operation. + */ +static int dln2_spi_read_one(struct dln2_spi *dln2, u8 *data, + u16 data_len, u8 attr) +{ + int ret; + struct { + u8 port; + __le16 size; + u8 attr; + } __packed tx; + struct { + __le16 size; + u8 buf[DLN2_SPI_MAX_XFER_SIZE]; + } __packed *rx = dln2->buf; + unsigned rx_len = sizeof(*rx); + + BUILD_BUG_ON(sizeof(*rx) > DLN2_SPI_BUF_SIZE); + + if (data_len > DLN2_SPI_MAX_XFER_SIZE) + return -EINVAL; + + tx.port = dln2->port; + tx.size = cpu_to_le16(data_len); + tx.attr = attr; + + ret = dln2_transfer(dln2->pdev, DLN2_SPI_READ, &tx, sizeof(tx), + rx, &rx_len); + if (ret < 0) + return ret; + if (rx_len < sizeof(rx->size) + data_len) + return -EPROTO; + if (le16_to_cpu(rx->size) != data_len) + return -EPROTO; + + dln2_spi_copy_from_buf(data, rx->buf, data_len, dln2->bpw); + + return 0; +} + +/* + * Perform one write & read operation. + */ +static int dln2_spi_read_write_one(struct dln2_spi *dln2, const u8 *tx_data, + u8 *rx_data, u16 data_len, u8 attr) +{ + int ret; + struct { + u8 port; + __le16 size; + u8 attr; + u8 buf[DLN2_SPI_MAX_XFER_SIZE]; + } __packed *tx; + struct { + __le16 size; + u8 buf[DLN2_SPI_MAX_XFER_SIZE]; + } __packed *rx; + unsigned tx_len, rx_len; + + BUILD_BUG_ON(sizeof(*tx) > DLN2_SPI_BUF_SIZE || + sizeof(*rx) > DLN2_SPI_BUF_SIZE); + + if (data_len > DLN2_SPI_MAX_XFER_SIZE) + return -EINVAL; + + /* + * Since this is a pseudo full-duplex communication, we're perfectly + * safe to use the same buffer for both tx and rx. When DLN2 sends the + * response back, with the rx data, we don't need the tx buffer anymore. + */ + tx = dln2->buf; + rx = dln2->buf; + + tx->port = dln2->port; + tx->size = cpu_to_le16(data_len); + tx->attr = attr; + + dln2_spi_copy_to_buf(tx->buf, tx_data, data_len, dln2->bpw); + + tx_len = sizeof(*tx) + data_len - DLN2_SPI_MAX_XFER_SIZE; + rx_len = sizeof(*rx); + + ret = dln2_transfer(dln2->pdev, DLN2_SPI_READ_WRITE, tx, tx_len, + rx, &rx_len); + if (ret < 0) + return ret; + if (rx_len < sizeof(rx->size) + data_len) + return -EPROTO; + if (le16_to_cpu(rx->size) != data_len) + return -EPROTO; + + dln2_spi_copy_from_buf(rx_data, rx->buf, data_len, dln2->bpw); + + return 0; +} + +/* + * Read/Write wrapper. It will automatically split an operation into multiple + * single ones due to device buffer constraints. + */ +static int dln2_spi_rdwr(struct dln2_spi *dln2, const u8 *tx_data, + u8 *rx_data, u16 data_len, u8 attr) { + int ret; + u16 len; + u8 temp_attr; + u16 remaining = data_len; + u16 offset; + + do { + if (remaining > DLN2_SPI_MAX_XFER_SIZE) { + len = DLN2_SPI_MAX_XFER_SIZE; + temp_attr = DLN2_SPI_ATTR_LEAVE_SS_LOW; + } else { + len = remaining; + temp_attr = attr; + } + + offset = data_len - remaining; + + if (tx_data && rx_data) { + ret = dln2_spi_read_write_one(dln2, + tx_data + offset, + rx_data + offset, + len, temp_attr); + } else if (tx_data) { + ret = dln2_spi_write_one(dln2, + tx_data + offset, + len, temp_attr); + } else if (rx_data) { + ret = dln2_spi_read_one(dln2, + rx_data + offset, + len, temp_attr); + } else { + return -EINVAL; + } + + if (ret < 0) + return ret; + + remaining -= len; + } while (remaining); + + return 0; +} + +static int dln2_spi_prepare_message(struct spi_master *master, + struct spi_message *message) +{ + int ret; + struct dln2_spi *dln2 = spi_master_get_devdata(master); + struct spi_device *spi = message->spi; + + if (dln2->cs != spi->chip_select) { + ret = dln2_spi_cs_set_one(dln2, spi->chip_select); + if (ret < 0) + return ret; + + dln2->cs = spi->chip_select; + } + + return 0; +} + +static int dln2_spi_transfer_setup(struct dln2_spi *dln2, u32 speed, + u8 bpw, u8 mode) +{ + int ret; + bool bus_setup_change; + + bus_setup_change = dln2->speed != speed || dln2->mode != mode || + dln2->bpw != bpw; + + if (!bus_setup_change) + return 0; + + ret = dln2_spi_enable(dln2, false); + if (ret < 0) + return ret; + + if (dln2->speed != speed) { + ret = dln2_spi_set_speed(dln2, speed); + if (ret < 0) + return ret; + + dln2->speed = speed; + } + + if (dln2->mode != mode) { + ret = dln2_spi_set_mode(dln2, mode & 0x3); + if (ret < 0) + return ret; + + dln2->mode = mode; + } + + if (dln2->bpw != bpw) { + ret = dln2_spi_set_bpw(dln2, bpw); + if (ret < 0) + return ret; + + dln2->bpw = bpw; + } + + return dln2_spi_enable(dln2, true); +} + +static int dln2_spi_transfer_one(struct spi_master *master, + struct spi_device *spi, + struct spi_transfer *xfer) +{ + struct dln2_spi *dln2 = spi_master_get_devdata(master); + int status; + u8 attr = 0; + + status = dln2_spi_transfer_setup(dln2, xfer->speed_hz, + xfer->bits_per_word, + spi->mode); + if (status < 0) { + dev_err(&dln2->pdev->dev, "Cannot setup transfer\n"); + return status; + } + + if (!xfer->cs_change && !spi_transfer_is_last(master, xfer)) + attr = DLN2_SPI_ATTR_LEAVE_SS_LOW; + + status = dln2_spi_rdwr(dln2, xfer->tx_buf, xfer->rx_buf, + xfer->len, attr); + if (status < 0) + dev_err(&dln2->pdev->dev, "write/read failed!\n"); + + return status; +} + +static int dln2_spi_probe(struct platform_device *pdev) +{ + struct spi_master *master; + struct dln2_spi *dln2; + struct dln2_platform_data *pdata = dev_get_platdata(&pdev->dev); + int ret; + + master = spi_alloc_master(&pdev->dev, sizeof(*dln2)); + if (!master) + return -ENOMEM; + + platform_set_drvdata(pdev, master); + + dln2 = spi_master_get_devdata(master); + + dln2->buf = devm_kmalloc(&pdev->dev, DLN2_SPI_BUF_SIZE, GFP_KERNEL); + if (!dln2->buf) { + ret = -ENOMEM; + goto exit_free_master; + } + + dln2->master = master; + dln2->pdev = pdev; + dln2->port = pdata->port; + /* cs/mode can never be 0xff, so the first transfer will set them */ + dln2->cs = 0xff; + dln2->mode = 0xff; + + /* disable SPI module before continuing with the setup */ + ret = dln2_spi_enable(dln2, false); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to disable SPI module\n"); + goto exit_free_master; + } + + ret = dln2_spi_get_cs_num(dln2, &master->num_chipselect); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to get number of CS pins\n"); + goto exit_free_master; + } + + ret = dln2_spi_get_speed_range(dln2, + &master->min_speed_hz, + &master->max_speed_hz); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to read bus min/max freqs\n"); + goto exit_free_master; + } + + ret = dln2_spi_get_supported_frame_sizes(dln2, + &master->bits_per_word_mask); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to read supported frame sizes\n"); + goto exit_free_master; + } + + ret = dln2_spi_cs_enable_all(dln2, true); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to enable CS pins\n"); + goto exit_free_master; + } + + master->bus_num = -1; + master->mode_bits = SPI_CPOL | SPI_CPHA; + master->prepare_message = dln2_spi_prepare_message; + master->transfer_one = dln2_spi_transfer_one; + master->auto_runtime_pm = true; + + /* enable SPI module, we're good to go */ + ret = dln2_spi_enable(dln2, true); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to enable SPI module\n"); + goto exit_free_master; + } + + pm_runtime_set_autosuspend_delay(&pdev->dev, + DLN2_RPM_AUTOSUSPEND_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + ret = devm_spi_register_master(&pdev->dev, master); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to register master\n"); + goto exit_register; + } + + return ret; + +exit_register: + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + + if (dln2_spi_enable(dln2, false) < 0) + dev_err(&pdev->dev, "Failed to disable SPI module\n"); +exit_free_master: + spi_master_put(master); + + return ret; +} + +static int dln2_spi_remove(struct platform_device *pdev) +{ + struct spi_master *master = spi_master_get(platform_get_drvdata(pdev)); + struct dln2_spi *dln2 = spi_master_get_devdata(master); + + pm_runtime_disable(&pdev->dev); + + if (dln2_spi_enable(dln2, false) < 0) + dev_err(&pdev->dev, "Failed to disable SPI module\n"); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int dln2_spi_suspend(struct device *dev) +{ + int ret; + struct spi_master *master = dev_get_drvdata(dev); + struct dln2_spi *dln2 = spi_master_get_devdata(master); + + ret = spi_master_suspend(master); + if (ret < 0) + return ret; + + if (!pm_runtime_suspended(dev)) { + ret = dln2_spi_enable(dln2, false); + if (ret < 0) + return ret; + } + + /* + * USB power may be cut off during sleep. Resetting the following + * parameters will force the board to be set up before first transfer. + */ + dln2->cs = 0xff; + dln2->speed = 0; + dln2->bpw = 0; + dln2->mode = 0xff; + + return 0; +} + +static int dln2_spi_resume(struct device *dev) +{ + int ret; + struct spi_master *master = dev_get_drvdata(dev); + struct dln2_spi *dln2 = spi_master_get_devdata(master); + + if (!pm_runtime_suspended(dev)) { + ret = dln2_spi_cs_enable_all(dln2, true); + if (ret < 0) + return ret; + + ret = dln2_spi_enable(dln2, true); + if (ret < 0) + return ret; + } + + return spi_master_resume(master); +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int dln2_spi_runtime_suspend(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + struct dln2_spi *dln2 = spi_master_get_devdata(master); + + return dln2_spi_enable(dln2, false); +} + +static int dln2_spi_runtime_resume(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + struct dln2_spi *dln2 = spi_master_get_devdata(master); + + return dln2_spi_enable(dln2, true); +} +#endif /* CONFIG_PM */ + +static const struct dev_pm_ops dln2_spi_pm = { + SET_SYSTEM_SLEEP_PM_OPS(dln2_spi_suspend, dln2_spi_resume) + SET_RUNTIME_PM_OPS(dln2_spi_runtime_suspend, + dln2_spi_runtime_resume, NULL) +}; + +static struct platform_driver spi_dln2_driver = { + .driver = { + .name = "dln2-spi", + .pm = &dln2_spi_pm, + }, + .probe = dln2_spi_probe, + .remove = dln2_spi_remove, +}; +module_platform_driver(spi_dln2_driver); + +MODULE_DESCRIPTION("Driver for the Diolan DLN2 SPI master interface"); +MODULE_AUTHOR("Laurentiu Palcu <laurentiu.palcu@intel.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:dln2-spi"); diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c index a67d37c7e3c0..a0197fd4e95c 100644 --- a/drivers/spi/spi-dw-mid.c +++ b/drivers/spi/spi-dw-mid.c @@ -247,9 +247,9 @@ static struct dw_spi_dma_ops mid_dma_ops = { /* Some specific info for SPI0 controller on Intel MID */ -/* HW info for MRST CLk Control Unit, one 32b reg */ +/* HW info for MRST Clk Control Unit, 32b reg per controller */ #define MRST_SPI_CLK_BASE 100000000 /* 100m */ -#define MRST_CLK_SPI0_REG 0xff11d86c +#define MRST_CLK_SPI_REG 0xff11d86c #define CLK_SPI_BDIV_OFFSET 0 #define CLK_SPI_BDIV_MASK 0x00000007 #define CLK_SPI_CDIV_OFFSET 9 @@ -261,16 +261,17 @@ int dw_spi_mid_init(struct dw_spi *dws) void __iomem *clk_reg; u32 clk_cdiv; - clk_reg = ioremap_nocache(MRST_CLK_SPI0_REG, 16); + clk_reg = ioremap_nocache(MRST_CLK_SPI_REG, 16); if (!clk_reg) return -ENOMEM; - /* get SPI controller operating freq info */ - clk_cdiv = (readl(clk_reg) & CLK_SPI_CDIV_MASK) >> CLK_SPI_CDIV_OFFSET; + /* Get SPI controller operating freq info */ + clk_cdiv = readl(clk_reg + dws->bus_num * sizeof(u32)); + clk_cdiv &= CLK_SPI_CDIV_MASK; + clk_cdiv >>= CLK_SPI_CDIV_OFFSET; dws->max_freq = MRST_SPI_CLK_BASE / (clk_cdiv + 1); - iounmap(clk_reg); - dws->num_cs = 16; + iounmap(clk_reg); #ifdef CONFIG_SPI_DW_MID_DMA dws->dma_priv = kzalloc(sizeof(struct mid_dma), GFP_KERNEL); diff --git a/drivers/spi/spi-dw-pci.c b/drivers/spi/spi-dw-pci.c index ba68da12cdf0..5ba331047cbe 100644 --- a/drivers/spi/spi-dw-pci.c +++ b/drivers/spi/spi-dw-pci.c @@ -30,10 +30,20 @@ struct dw_spi_pci { struct spi_pci_desc { int (*setup)(struct dw_spi *); + u16 num_cs; + u16 bus_num; }; -static struct spi_pci_desc spi_pci_mid_desc = { +static struct spi_pci_desc spi_pci_mid_desc_1 = { .setup = dw_spi_mid_init, + .num_cs = 32, + .bus_num = 0, +}; + +static struct spi_pci_desc spi_pci_mid_desc_2 = { + .setup = dw_spi_mid_init, + .num_cs = 4, + .bus_num = 1, }; static int spi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -65,18 +75,23 @@ static int spi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dws->regs = pcim_iomap_table(pdev)[pci_bar]; - dws->bus_num = 0; - dws->num_cs = 4; dws->irq = pdev->irq; /* * Specific handling for paltforms, like dma setup, * clock rate, FIFO depth. */ - if (desc && desc->setup) { - ret = desc->setup(dws); - if (ret) - return ret; + if (desc) { + dws->num_cs = desc->num_cs; + dws->bus_num = desc->bus_num; + + if (desc->setup) { + ret = desc->setup(dws); + if (ret) + return ret; + } + } else { + return -ENODEV; } ret = dw_spi_add_host(&pdev->dev, dws); @@ -121,7 +136,14 @@ static SIMPLE_DEV_PM_OPS(dw_spi_pm_ops, spi_suspend, spi_resume); static const struct pci_device_id pci_ids[] = { /* Intel MID platform SPI controller 0 */ - { PCI_VDEVICE(INTEL, 0x0800), (kernel_ulong_t)&spi_pci_mid_desc}, + /* + * The access to the device 8086:0801 is disabled by HW, since it's + * exclusively used by SCU to communicate with MSIC. + */ + /* Intel MID platform SPI controller 1 */ + { PCI_VDEVICE(INTEL, 0x0800), (kernel_ulong_t)&spi_pci_mid_desc_1}, + /* Intel MID platform SPI controller 2 */ + { PCI_VDEVICE(INTEL, 0x0812), (kernel_ulong_t)&spi_pci_mid_desc_2}, {}, }; diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index 8edcd1b84562..5a97a62b298a 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -608,7 +608,7 @@ static void dw_spi_cleanup(struct spi_device *spi) } /* Restart the controller, disable all interrupts, clean rx fifo */ -static void spi_hw_init(struct dw_spi *dws) +static void spi_hw_init(struct device *dev, struct dw_spi *dws) { spi_enable_chip(dws, 0); spi_mask_intr(dws, 0xff); @@ -626,9 +626,10 @@ static void spi_hw_init(struct dw_spi *dws) if (fifo != dw_readw(dws, DW_SPI_TXFLTR)) break; } + dw_writew(dws, DW_SPI_TXFLTR, 0); dws->fifo_len = (fifo == 2) ? 0 : fifo - 1; - dw_writew(dws, DW_SPI_TXFLTR, 0); + dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len); } } @@ -668,7 +669,7 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws) master->dev.of_node = dev->of_node; /* Basic HW init */ - spi_hw_init(dws); + spi_hw_init(dev, dws); if (dws->dma_ops && dws->dma_ops->dma_init) { ret = dws->dma_ops->dma_init(dws); @@ -731,7 +732,7 @@ int dw_spi_resume_host(struct dw_spi *dws) { int ret; - spi_hw_init(dws); + spi_hw_init(&dws->master->dev, dws); ret = spi_master_resume(dws->master); if (ret) dev_err(&dws->master->dev, "fail to start queue (%d)\n", ret); diff --git a/drivers/spi/spi-falcon.c b/drivers/spi/spi-falcon.c index 912b9037e9cf..286b2c81fc6b 100644 --- a/drivers/spi/spi-falcon.c +++ b/drivers/spi/spi-falcon.c @@ -353,16 +353,6 @@ static int falcon_sflash_setup(struct spi_device *spi) return 0; } -static int falcon_sflash_prepare_xfer(struct spi_master *master) -{ - return 0; -} - -static int falcon_sflash_unprepare_xfer(struct spi_master *master) -{ - return 0; -} - static int falcon_sflash_xfer_one(struct spi_master *master, struct spi_message *m) { @@ -420,9 +410,7 @@ static int falcon_sflash_probe(struct platform_device *pdev) master->mode_bits = SPI_MODE_3; master->flags = SPI_MASTER_HALF_DUPLEX; master->setup = falcon_sflash_setup; - master->prepare_transfer_hardware = falcon_sflash_prepare_xfer; master->transfer_one_message = falcon_sflash_xfer_one; - master->unprepare_transfer_hardware = falcon_sflash_unprepare_xfer; master->dev.of_node = pdev->dev.of_node; ret = devm_spi_register_master(&pdev->dev, master); diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c index e85ab1cb17a2..9c46a3058743 100644 --- a/drivers/spi/spi-fsl-cpm.c +++ b/drivers/spi/spi-fsl-cpm.c @@ -20,6 +20,7 @@ #include <linux/dma-mapping.h> #include <linux/fsl_devices.h> #include <linux/kernel.h> +#include <linux/module.h> #include <linux/of_address.h> #include <linux/spi/spi.h> #include <linux/types.h> @@ -68,6 +69,7 @@ void fsl_spi_cpm_reinit_txrx(struct mpc8xxx_spi *mspi) } } } +EXPORT_SYMBOL_GPL(fsl_spi_cpm_reinit_txrx); static void fsl_spi_cpm_bufs_start(struct mpc8xxx_spi *mspi) { @@ -162,6 +164,7 @@ err_rx_dma: dma_unmap_single(dev, mspi->tx_dma, t->len, DMA_TO_DEVICE); return -ENOMEM; } +EXPORT_SYMBOL_GPL(fsl_spi_cpm_bufs); void fsl_spi_cpm_bufs_complete(struct mpc8xxx_spi *mspi) { @@ -174,6 +177,7 @@ void fsl_spi_cpm_bufs_complete(struct mpc8xxx_spi *mspi) dma_unmap_single(dev, mspi->rx_dma, t->len, DMA_FROM_DEVICE); mspi->xfer_in_progress = NULL; } +EXPORT_SYMBOL_GPL(fsl_spi_cpm_bufs_complete); void fsl_spi_cpm_irq(struct mpc8xxx_spi *mspi, u32 events) { @@ -198,6 +202,7 @@ void fsl_spi_cpm_irq(struct mpc8xxx_spi *mspi, u32 events) else complete(&mspi->done); } +EXPORT_SYMBOL_GPL(fsl_spi_cpm_irq); static void *fsl_spi_alloc_dummy_rx(void) { @@ -375,6 +380,7 @@ err_pram: fsl_spi_free_dummy_rx(); return -ENOMEM; } +EXPORT_SYMBOL_GPL(fsl_spi_cpm_init); void fsl_spi_cpm_free(struct mpc8xxx_spi *mspi) { @@ -389,3 +395,6 @@ void fsl_spi_cpm_free(struct mpc8xxx_spi *mspi) cpm_muram_free(cpm_muram_offset(mspi->pram)); fsl_spi_free_dummy_rx(); } +EXPORT_SYMBOL_GPL(fsl_spi_cpm_free); + +MODULE_LICENSE("GPL"); diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 9b80d54d4ddb..d1a39249704a 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -106,7 +106,7 @@ struct chip_data { }; struct fsl_dspi { - struct spi_bitbang bitbang; + struct spi_master *master; struct platform_device *pdev; struct regmap *regmap; @@ -114,6 +114,7 @@ struct fsl_dspi { struct clk *clk; struct spi_transfer *cur_transfer; + struct spi_message *cur_msg; struct chip_data *cur_chip; size_t len; void *tx; @@ -123,6 +124,7 @@ struct fsl_dspi { char dataflags; u8 cs; u16 void_write_data; + u32 cs_change; wait_queue_head_t waitq; u32 waitflags; @@ -225,6 +227,8 @@ static int dspi_transfer_write(struct fsl_dspi *dspi) if (dspi->len == 0 || tx_count == DSPI_FIFO_SIZE - 1) { /* last transfer in the transfer */ dspi_pushr |= SPI_PUSHR_EOQ; + if ((dspi->cs_change) && (!dspi->len)) + dspi_pushr &= ~SPI_PUSHR_CONT; } else if (tx_word && (dspi->len == 1)) dspi_pushr |= SPI_PUSHR_EOQ; @@ -246,6 +250,7 @@ static int dspi_transfer_read(struct fsl_dspi *dspi) int rx_count = 0; int rx_word = is_double_byte_mode(dspi); u16 d; + while ((dspi->rx < dspi->rx_end) && (rx_count < DSPI_FIFO_SIZE)) { if (rx_word) { @@ -276,69 +281,79 @@ static int dspi_transfer_read(struct fsl_dspi *dspi) return rx_count; } -static int dspi_txrx_transfer(struct spi_device *spi, struct spi_transfer *t) +static int dspi_transfer_one_message(struct spi_master *master, + struct spi_message *message) { - struct fsl_dspi *dspi = spi_master_get_devdata(spi->master); - dspi->cur_transfer = t; - dspi->cur_chip = spi_get_ctldata(spi); - dspi->cs = spi->chip_select; - dspi->void_write_data = dspi->cur_chip->void_write_data; - - dspi->dataflags = 0; - dspi->tx = (void *)t->tx_buf; - dspi->tx_end = dspi->tx + t->len; - dspi->rx = t->rx_buf; - dspi->rx_end = dspi->rx + t->len; - dspi->len = t->len; - - if (!dspi->rx) - dspi->dataflags |= TRAN_STATE_RX_VOID; - - if (!dspi->tx) - dspi->dataflags |= TRAN_STATE_TX_VOID; - - regmap_write(dspi->regmap, SPI_MCR, dspi->cur_chip->mcr_val); - regmap_write(dspi->regmap, SPI_CTAR(dspi->cs), dspi->cur_chip->ctar_val); - regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_EOQFE); - - if (t->speed_hz) + struct fsl_dspi *dspi = spi_master_get_devdata(master); + struct spi_device *spi = message->spi; + struct spi_transfer *transfer; + int status = 0; + message->actual_length = 0; + + list_for_each_entry(transfer, &message->transfers, transfer_list) { + dspi->cur_transfer = transfer; + dspi->cur_msg = message; + dspi->cur_chip = spi_get_ctldata(spi); + dspi->cs = spi->chip_select; + if (dspi->cur_transfer->transfer_list.next + == &dspi->cur_msg->transfers) + transfer->cs_change = 1; + dspi->cs_change = transfer->cs_change; + dspi->void_write_data = dspi->cur_chip->void_write_data; + + dspi->dataflags = 0; + dspi->tx = (void *)transfer->tx_buf; + dspi->tx_end = dspi->tx + transfer->len; + dspi->rx = transfer->rx_buf; + dspi->rx_end = dspi->rx + transfer->len; + dspi->len = transfer->len; + + if (!dspi->rx) + dspi->dataflags |= TRAN_STATE_RX_VOID; + + if (!dspi->tx) + dspi->dataflags |= TRAN_STATE_TX_VOID; + + regmap_write(dspi->regmap, SPI_MCR, dspi->cur_chip->mcr_val); + regmap_update_bits(dspi->regmap, SPI_MCR, + SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF, + SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF); regmap_write(dspi->regmap, SPI_CTAR(dspi->cs), dspi->cur_chip->ctar_val); + if (transfer->speed_hz) + regmap_write(dspi->regmap, SPI_CTAR(dspi->cs), + dspi->cur_chip->ctar_val); - dspi_transfer_write(dspi); - - if (wait_event_interruptible(dspi->waitq, dspi->waitflags)) - dev_err(&dspi->pdev->dev, "wait transfer complete fail!\n"); - dspi->waitflags = 0; - - return t->len - dspi->len; -} + regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_EOQFE); + message->actual_length += dspi_transfer_write(dspi); -static void dspi_chipselect(struct spi_device *spi, int value) -{ - struct fsl_dspi *dspi = spi_master_get_devdata(spi->master); - unsigned int pushr; + if (wait_event_interruptible(dspi->waitq, dspi->waitflags)) + dev_err(&dspi->pdev->dev, "wait transfer complete fail!\n"); + dspi->waitflags = 0; - regmap_read(dspi->regmap, SPI_PUSHR, &pushr); - - switch (value) { - case BITBANG_CS_ACTIVE: - pushr |= SPI_PUSHR_CONT; - break; - case BITBANG_CS_INACTIVE: - pushr &= ~SPI_PUSHR_CONT; - break; + if (transfer->delay_usecs) + udelay(transfer->delay_usecs); } - regmap_write(dspi->regmap, SPI_PUSHR, pushr); + message->status = status; + spi_finalize_current_message(master); + + return status; } -static int dspi_setup_transfer(struct spi_device *spi, struct spi_transfer *t) +static int dspi_setup(struct spi_device *spi) { struct chip_data *chip; struct fsl_dspi *dspi = spi_master_get_devdata(spi->master); unsigned char br = 0, pbr = 0, fmsz = 0; + if ((spi->bits_per_word >= 4) && (spi->bits_per_word <= 16)) { + fmsz = spi->bits_per_word - 1; + } else { + pr_err("Invalid wordsize\n"); + return -ENODEV; + } + /* Only alloc on first setup */ chip = spi_get_ctldata(spi); if (chip == NULL) { @@ -349,12 +364,6 @@ static int dspi_setup_transfer(struct spi_device *spi, struct spi_transfer *t) chip->mcr_val = SPI_MCR_MASTER | SPI_MCR_PCSIS | SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF; - if ((spi->bits_per_word >= 4) && (spi->bits_per_word <= 16)) { - fmsz = spi->bits_per_word - 1; - } else { - pr_err("Invalid wordsize\n"); - return -ENODEV; - } chip->void_write_data = 0; @@ -373,14 +382,6 @@ static int dspi_setup_transfer(struct spi_device *spi, struct spi_transfer *t) return 0; } -static int dspi_setup(struct spi_device *spi) -{ - if (!spi->max_speed_hz) - return -EINVAL; - - return dspi_setup_transfer(spi, NULL); -} - static void dspi_cleanup(struct spi_device *spi) { struct chip_data *chip = spi_get_ctldata((struct spi_device *)spi); @@ -395,22 +396,20 @@ static irqreturn_t dspi_interrupt(int irq, void *dev_id) { struct fsl_dspi *dspi = (struct fsl_dspi *)dev_id; - regmap_write(dspi->regmap, SPI_SR, SPI_SR_EOQF); + struct spi_message *msg = dspi->cur_msg; + regmap_write(dspi->regmap, SPI_SR, SPI_SR_EOQF); dspi_transfer_read(dspi); if (!dspi->len) { if (dspi->dataflags & TRAN_STATE_WORD_ODD_NUM) regmap_update_bits(dspi->regmap, SPI_CTAR(dspi->cs), - SPI_FRAME_BITS_MASK, SPI_FRAME_BITS(16)); + SPI_FRAME_BITS_MASK, SPI_FRAME_BITS(16)); dspi->waitflags = 1; wake_up_interruptible(&dspi->waitq); - } else { - dspi_transfer_write(dspi); - - return IRQ_HANDLED; - } + } else + msg->actual_length += dspi_transfer_write(dspi); return IRQ_HANDLED; } @@ -469,12 +468,12 @@ static int dspi_probe(struct platform_device *pdev) dspi = spi_master_get_devdata(master); dspi->pdev = pdev; - dspi->bitbang.master = master; - dspi->bitbang.chipselect = dspi_chipselect; - dspi->bitbang.setup_transfer = dspi_setup_transfer; - dspi->bitbang.txrx_bufs = dspi_txrx_transfer; - dspi->bitbang.master->setup = dspi_setup; - dspi->bitbang.master->dev.of_node = pdev->dev.of_node; + dspi->master = master; + + master->transfer = NULL; + master->setup = dspi_setup; + master->transfer_one_message = dspi_transfer_one_message; + master->dev.of_node = pdev->dev.of_node; master->cleanup = dspi_cleanup; master->mode_bits = SPI_CPOL | SPI_CPHA; @@ -535,7 +534,7 @@ static int dspi_probe(struct platform_device *pdev) init_waitqueue_head(&dspi->waitq); platform_set_drvdata(pdev, master); - ret = spi_bitbang_start(&dspi->bitbang); + ret = spi_register_master(master); if (ret != 0) { dev_err(&pdev->dev, "Problem registering DSPI master\n"); goto out_clk_put; @@ -557,9 +556,9 @@ static int dspi_remove(struct platform_device *pdev) struct fsl_dspi *dspi = spi_master_get_devdata(master); /* Disconnect from the SPI framework */ - spi_bitbang_stop(&dspi->bitbang); clk_disable_unprepare(dspi->clk); - spi_master_put(dspi->bitbang.master); + spi_unregister_master(dspi->master); + spi_master_put(dspi->master); return 0; } diff --git a/drivers/spi/spi-fsl-lib.c b/drivers/spi/spi-fsl-lib.c index 446b737e1532..cb35d2f0d0e6 100644 --- a/drivers/spi/spi-fsl-lib.c +++ b/drivers/spi/spi-fsl-lib.c @@ -21,6 +21,7 @@ #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/of_platform.h> #include <linux/spi/spi.h> #ifdef CONFIG_FSL_SOC @@ -35,7 +36,8 @@ void mpc8xxx_spi_rx_buf_##type(u32 data, struct mpc8xxx_spi *mpc8xxx_spi) \ type *rx = mpc8xxx_spi->rx; \ *rx++ = (type)(data >> mpc8xxx_spi->rx_shift); \ mpc8xxx_spi->rx = rx; \ -} +} \ +EXPORT_SYMBOL_GPL(mpc8xxx_spi_rx_buf_##type); #define MPC8XXX_SPI_TX_BUF(type) \ u32 mpc8xxx_spi_tx_buf_##type(struct mpc8xxx_spi *mpc8xxx_spi) \ @@ -47,7 +49,8 @@ u32 mpc8xxx_spi_tx_buf_##type(struct mpc8xxx_spi *mpc8xxx_spi) \ data = *tx++ << mpc8xxx_spi->tx_shift; \ mpc8xxx_spi->tx = tx; \ return data; \ -} +} \ +EXPORT_SYMBOL_GPL(mpc8xxx_spi_tx_buf_##type); MPC8XXX_SPI_RX_BUF(u8) MPC8XXX_SPI_RX_BUF(u16) @@ -60,6 +63,7 @@ struct mpc8xxx_spi_probe_info *to_of_pinfo(struct fsl_spi_platform_data *pdata) { return container_of(pdata, struct mpc8xxx_spi_probe_info, pdata); } +EXPORT_SYMBOL_GPL(to_of_pinfo); const char *mpc8xxx_spi_strmode(unsigned int flags) { @@ -75,6 +79,7 @@ const char *mpc8xxx_spi_strmode(unsigned int flags) } return "CPU"; } +EXPORT_SYMBOL_GPL(mpc8xxx_spi_strmode); void mpc8xxx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq) @@ -102,13 +107,12 @@ void mpc8xxx_spi_probe(struct device *dev, struct resource *mem, mpc8xxx_spi->rx_shift = 0; mpc8xxx_spi->tx_shift = 0; - init_completion(&mpc8xxx_spi->done); - master->bus_num = pdata->bus_num; master->num_chipselect = pdata->max_chipselect; init_completion(&mpc8xxx_spi->done); } +EXPORT_SYMBOL_GPL(mpc8xxx_spi_probe); int mpc8xxx_spi_remove(struct device *dev) { @@ -127,6 +131,7 @@ int mpc8xxx_spi_remove(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(mpc8xxx_spi_remove); int of_mpc8xxx_spi_probe(struct platform_device *ofdev) { @@ -173,3 +178,6 @@ int of_mpc8xxx_spi_probe(struct platform_device *ofdev) return 0; } +EXPORT_SYMBOL_GPL(of_mpc8xxx_spi_probe); + +MODULE_LICENSE("GPL"); diff --git a/drivers/spi/spi-fsl-lib.h b/drivers/spi/spi-fsl-lib.h index b4ed04e8862f..1326a392adca 100644 --- a/drivers/spi/spi-fsl-lib.h +++ b/drivers/spi/spi-fsl-lib.h @@ -28,7 +28,7 @@ struct mpc8xxx_spi { /* rx & tx bufs from the spi_transfer */ const void *tx; void *rx; -#ifdef CONFIG_SPI_FSL_ESPI +#if IS_ENABLED(CONFIG_SPI_FSL_ESPI) int len; #endif @@ -68,7 +68,7 @@ struct mpc8xxx_spi { unsigned int flags; -#ifdef CONFIG_SPI_FSL_SPI +#if IS_ENABLED(CONFIG_SPI_FSL_SPI) int type; int native_chipselects; u8 max_bits_per_word; diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c index aee4e7589568..1c34c9314c8a 100644 --- a/drivers/spi/spi-gpio.c +++ b/drivers/spi/spi-gpio.c @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/kernel.h> #include <linux/module.h> @@ -92,7 +88,7 @@ struct spi_gpio { /*----------------------------------------------------------------------*/ -static inline struct spi_gpio * __pure +static inline struct spi_gpio *__pure spi_to_spi_gpio(const struct spi_device *spi) { const struct spi_bitbang *bang; @@ -103,7 +99,7 @@ spi_to_spi_gpio(const struct spi_device *spi) return spi_gpio; } -static inline struct spi_gpio_platform_data * __pure +static inline struct spi_gpio_platform_data *__pure spi_to_pdata(const struct spi_device *spi) { return &spi_to_spi_gpio(spi)->pdata; diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index aad6683db81b..c01567d53581 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -160,16 +160,16 @@ static unsigned int spfi_pio_write32(struct img_spfi *spfi, const u32 *buf, unsigned int count = 0; u32 status; - while (count < max) { + while (count < max / 4) { spfi_writel(spfi, SPFI_INTERRUPT_SDFUL, SPFI_INTERRUPT_CLEAR); status = spfi_readl(spfi, SPFI_INTERRUPT_STATUS); if (status & SPFI_INTERRUPT_SDFUL) break; - spfi_writel(spfi, buf[count / 4], SPFI_TX_32BIT_VALID_DATA); - count += 4; + spfi_writel(spfi, buf[count], SPFI_TX_32BIT_VALID_DATA); + count++; } - return count; + return count * 4; } static unsigned int spfi_pio_write8(struct img_spfi *spfi, const u8 *buf, @@ -196,17 +196,17 @@ static unsigned int spfi_pio_read32(struct img_spfi *spfi, u32 *buf, unsigned int count = 0; u32 status; - while (count < max) { + while (count < max / 4) { spfi_writel(spfi, SPFI_INTERRUPT_GDEX32BIT, SPFI_INTERRUPT_CLEAR); status = spfi_readl(spfi, SPFI_INTERRUPT_STATUS); if (!(status & SPFI_INTERRUPT_GDEX32BIT)) break; - buf[count / 4] = spfi_readl(spfi, SPFI_RX_32BIT_VALID_DATA); - count += 4; + buf[count] = spfi_readl(spfi, SPFI_RX_32BIT_VALID_DATA); + count++; } - return count; + return count * 4; } static unsigned int spfi_pio_read8(struct img_spfi *spfi, u8 *buf, @@ -251,17 +251,15 @@ static int img_spfi_start_pio(struct spi_master *master, time_before(jiffies, timeout)) { unsigned int tx_count, rx_count; - switch (xfer->bits_per_word) { - case 32: + if (tx_bytes >= 4) tx_count = spfi_pio_write32(spfi, tx_buf, tx_bytes); - rx_count = spfi_pio_read32(spfi, rx_buf, rx_bytes); - break; - case 8: - default: + else tx_count = spfi_pio_write8(spfi, tx_buf, tx_bytes); + + if (rx_bytes >= 4) + rx_count = spfi_pio_read32(spfi, rx_buf, rx_bytes); + else rx_count = spfi_pio_read8(spfi, rx_buf, rx_bytes); - break; - } tx_buf += tx_count; rx_buf += rx_count; @@ -331,14 +329,11 @@ static int img_spfi_start_dma(struct spi_master *master, if (xfer->rx_buf) { rxconf.direction = DMA_DEV_TO_MEM; - switch (xfer->bits_per_word) { - case 32: + if (xfer->len % 4 == 0) { rxconf.src_addr = spfi->phys + SPFI_RX_32BIT_VALID_DATA; rxconf.src_addr_width = 4; rxconf.src_maxburst = 4; - break; - case 8: - default: + } else { rxconf.src_addr = spfi->phys + SPFI_RX_8BIT_VALID_DATA; rxconf.src_addr_width = 1; rxconf.src_maxburst = 4; @@ -358,18 +353,14 @@ static int img_spfi_start_dma(struct spi_master *master, if (xfer->tx_buf) { txconf.direction = DMA_MEM_TO_DEV; - switch (xfer->bits_per_word) { - case 32: + if (xfer->len % 4 == 0) { txconf.dst_addr = spfi->phys + SPFI_TX_32BIT_VALID_DATA; txconf.dst_addr_width = 4; txconf.dst_maxburst = 4; - break; - case 8: - default: + } else { txconf.dst_addr = spfi->phys + SPFI_TX_8BIT_VALID_DATA; txconf.dst_addr_width = 1; txconf.dst_maxburst = 4; - break; } dmaengine_slave_config(spfi->tx_ch, &txconf); @@ -508,9 +499,7 @@ static void img_spfi_set_cs(struct spi_device *spi, bool enable) static bool img_spfi_can_dma(struct spi_master *master, struct spi_device *spi, struct spi_transfer *xfer) { - if (xfer->bits_per_word == 8 && xfer->len > SPFI_8BIT_FIFO_SIZE) - return true; - if (xfer->bits_per_word == 32 && xfer->len > SPFI_32BIT_FIFO_SIZE) + if (xfer->len > SPFI_32BIT_FIFO_SIZE) return true; return false; } diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index fe1b7699fab6..6fea4af51c41 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -89,7 +89,6 @@ struct spi_imx_data { struct completion xfer_done; void __iomem *base; - int irq; struct clk *clk_per; struct clk *clk_ipg; unsigned long spi_clk; @@ -896,6 +895,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx, { struct dma_async_tx_descriptor *desc_tx = NULL, *desc_rx = NULL; int ret; + unsigned long timeout; u32 dma; int left; struct spi_master *master = spi_imx->bitbang.master; @@ -943,17 +943,17 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx, dma_async_issue_pending(master->dma_tx); dma_async_issue_pending(master->dma_rx); /* Wait SDMA to finish the data transfer.*/ - ret = wait_for_completion_timeout(&spi_imx->dma_tx_completion, + timeout = wait_for_completion_timeout(&spi_imx->dma_tx_completion, IMX_DMA_TIMEOUT); - if (!ret) { + if (!timeout) { pr_warn("%s %s: I/O Error in DMA TX\n", dev_driver_string(&master->dev), dev_name(&master->dev)); dmaengine_terminate_all(master->dma_tx); } else { - ret = wait_for_completion_timeout(&spi_imx->dma_rx_completion, - IMX_DMA_TIMEOUT); - if (!ret) { + timeout = wait_for_completion_timeout( + &spi_imx->dma_rx_completion, IMX_DMA_TIMEOUT); + if (!timeout) { pr_warn("%s %s: I/O Error in DMA RX\n", dev_driver_string(&master->dev), dev_name(&master->dev)); @@ -968,9 +968,9 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx, spi_imx->dma_finished = 1; spi_imx->devtype_data->trigger(spi_imx); - if (!ret) + if (!timeout) ret = -ETIMEDOUT; - else if (ret > 0) + else ret = transfer->len; return ret; @@ -1080,7 +1080,7 @@ static int spi_imx_probe(struct platform_device *pdev) struct spi_master *master; struct spi_imx_data *spi_imx; struct resource *res; - int i, ret, num_cs; + int i, ret, num_cs, irq; if (!np && !mxc_platform_info) { dev_err(&pdev->dev, "can't get the platform data\n"); @@ -1147,16 +1147,16 @@ static int spi_imx_probe(struct platform_device *pdev) goto out_master_put; } - spi_imx->irq = platform_get_irq(pdev, 0); - if (spi_imx->irq < 0) { - ret = spi_imx->irq; + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; goto out_master_put; } - ret = devm_request_irq(&pdev->dev, spi_imx->irq, spi_imx_isr, 0, + ret = devm_request_irq(&pdev->dev, irq, spi_imx_isr, 0, dev_name(&pdev->dev), spi_imx); if (ret) { - dev_err(&pdev->dev, "can't get irq%d: %d\n", spi_imx->irq, ret); + dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret); goto out_master_put; } diff --git a/drivers/spi/spi-lm70llp.c b/drivers/spi/spi-lm70llp.c index 41c5765be746..ba72347cb99d 100644 --- a/drivers/spi/spi-lm70llp.c +++ b/drivers/spi/spi-lm70llp.c @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/init.h> diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c index 1bbac0378bf7..5468fc70dbf8 100644 --- a/drivers/spi/spi-meson-spifc.c +++ b/drivers/spi/spi-meson-spifc.c @@ -85,7 +85,7 @@ struct meson_spifc { struct device *dev; }; -static struct regmap_config spifc_regmap_config = { +static const struct regmap_config spifc_regmap_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c index 4045a1e580e1..5b0e9a3e83f6 100644 --- a/drivers/spi/spi-mxs.c +++ b/drivers/spi/spi-mxs.c @@ -282,9 +282,8 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi, dmaengine_submit(desc); dma_async_issue_pending(ssp->dmach); - ret = wait_for_completion_timeout(&spi->c, - msecs_to_jiffies(SSP_TIMEOUT)); - if (!ret) { + if (!wait_for_completion_timeout(&spi->c, + msecs_to_jiffies(SSP_TIMEOUT))) { dev_err(ssp->dev, "DMA transfer timeout\n"); ret = -ETIMEDOUT; dmaengine_terminate_all(ssp->dmach); diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c index 79399ae9c84c..d890d309dff9 100644 --- a/drivers/spi/spi-omap-100k.c +++ b/drivers/spi/spi-omap-100k.c @@ -16,11 +16,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #include <linux/kernel.h> #include <linux/init.h> diff --git a/drivers/spi/spi-omap-uwire.c b/drivers/spi/spi-omap-uwire.c index daf1ada5cd11..3c0844457c07 100644 --- a/drivers/spi/spi-omap-uwire.c +++ b/drivers/spi/spi-omap-uwire.c @@ -28,10 +28,6 @@ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/kernel.h> #include <linux/init.h> diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 3bc3cbabbbc0..4df8942058de 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -14,11 +14,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #include <linux/kernel.h> diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index 3dec9e0b99b8..861664776672 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -28,7 +28,12 @@ /* Runtime PM autosuspend timeout: PM is fairly light on this driver */ #define SPI_AUTOSUSPEND_TIMEOUT 200 -#define ORION_NUM_CHIPSELECTS 1 /* only one slave is supported*/ +/* Some SoCs using this driver support up to 8 chip selects. + * It is up to the implementer to only use the chip selects + * that are available. + */ +#define ORION_NUM_CHIPSELECTS 8 + #define ORION_SPI_WAIT_RDY_MAX_LOOP 2000 /* in usec */ #define ORION_SPI_IF_CTRL_REG 0x00 @@ -44,6 +49,10 @@ #define ARMADA_SPI_CLK_PRESCALE_MASK 0xDF #define ORION_SPI_MODE_MASK (ORION_SPI_MODE_CPOL | \ ORION_SPI_MODE_CPHA) +#define ORION_SPI_CS_MASK 0x1C +#define ORION_SPI_CS_SHIFT 2 +#define ORION_SPI_CS(cs) ((cs << ORION_SPI_CS_SHIFT) & \ + ORION_SPI_CS_MASK) enum orion_spi_type { ORION_SPI, @@ -215,9 +224,18 @@ orion_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t) return 0; } -static void orion_spi_set_cs(struct orion_spi *orion_spi, int enable) +static void orion_spi_set_cs(struct spi_device *spi, bool enable) { - if (enable) + struct orion_spi *orion_spi; + + orion_spi = spi_master_get_devdata(spi->master); + + orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, ORION_SPI_CS_MASK); + orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, + ORION_SPI_CS(spi->chip_select)); + + /* Chip select logic is inverted from spi_set_cs */ + if (!enable) orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1); else orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1); @@ -332,64 +350,31 @@ out: return xfer->len - count; } -static int orion_spi_transfer_one_message(struct spi_master *master, - struct spi_message *m) +static int orion_spi_transfer_one(struct spi_master *master, + struct spi_device *spi, + struct spi_transfer *t) { - struct orion_spi *orion_spi = spi_master_get_devdata(master); - struct spi_device *spi = m->spi; - struct spi_transfer *t = NULL; - int par_override = 0; int status = 0; - int cs_active = 0; - - /* Load defaults */ - status = orion_spi_setup_transfer(spi, NULL); + status = orion_spi_setup_transfer(spi, t); if (status < 0) - goto msg_done; - - list_for_each_entry(t, &m->transfers, transfer_list) { - if (par_override || t->speed_hz || t->bits_per_word) { - par_override = 1; - status = orion_spi_setup_transfer(spi, t); - if (status < 0) - break; - if (!t->speed_hz && !t->bits_per_word) - par_override = 0; - } - - if (!cs_active) { - orion_spi_set_cs(orion_spi, 1); - cs_active = 1; - } + return status; - if (t->len) - m->actual_length += orion_spi_write_read(spi, t); + if (t->len) + orion_spi_write_read(spi, t); - if (t->delay_usecs) - udelay(t->delay_usecs); - - if (t->cs_change) { - orion_spi_set_cs(orion_spi, 0); - cs_active = 0; - } - } - -msg_done: - if (cs_active) - orion_spi_set_cs(orion_spi, 0); - - m->status = status; - spi_finalize_current_message(master); + return status; +} - return 0; +static int orion_spi_setup(struct spi_device *spi) +{ + return orion_spi_setup_transfer(spi, NULL); } static int orion_spi_reset(struct orion_spi *orion_spi) { /* Verify that the CS is deasserted */ - orion_spi_set_cs(orion_spi, 0); - + orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1); return 0; } @@ -442,9 +427,10 @@ static int orion_spi_probe(struct platform_device *pdev) /* we support only mode 0, and no options */ master->mode_bits = SPI_CPHA | SPI_CPOL; - - master->transfer_one_message = orion_spi_transfer_one_message; + master->set_cs = orion_spi_set_cs; + master->transfer_one = orion_spi_transfer_one; master->num_chipselect = ORION_NUM_CHIPSELECTS; + master->setup = orion_spi_setup; master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16); master->auto_runtime_pm = true; diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c index 62a9297e96ac..66a173939be8 100644 --- a/drivers/spi/spi-pxa2xx-dma.c +++ b/drivers/spi/spi-pxa2xx-dma.c @@ -111,23 +111,24 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data, * by using ->dma_running. */ if (atomic_dec_and_test(&drv_data->dma_running)) { - void __iomem *reg = drv_data->ioaddr; - /* * If the other CPU is still handling the ROR interrupt we * might not know about the error yet. So we re-check the * ROR bit here before we clear the status register. */ if (!error) { - u32 status = read_SSSR(reg) & drv_data->mask_sr; + u32 status = pxa2xx_spi_read(drv_data, SSSR) + & drv_data->mask_sr; error = status & SSSR_ROR; } /* Clear status & disable interrupts */ - write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg); + pxa2xx_spi_write(drv_data, SSCR1, + pxa2xx_spi_read(drv_data, SSCR1) + & ~drv_data->dma_cr1); write_SSSR_CS(drv_data, drv_data->clear_sr); if (!pxa25x_ssp_comp(drv_data)) - write_SSTO(0, reg); + pxa2xx_spi_write(drv_data, SSTO, 0); if (!error) { pxa2xx_spi_unmap_dma_buffers(drv_data); @@ -139,7 +140,9 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data, msg->state = pxa2xx_spi_next_transfer(drv_data); } else { /* In case we got an error we disable the SSP now */ - write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg); + pxa2xx_spi_write(drv_data, SSCR0, + pxa2xx_spi_read(drv_data, SSCR0) + & ~SSCR0_SSE); msg->state = ERROR_STATE; } @@ -247,7 +250,7 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data) { u32 status; - status = read_SSSR(drv_data->ioaddr) & drv_data->mask_sr; + status = pxa2xx_spi_read(drv_data, SSSR) & drv_data->mask_sr; if (status & SSSR_ROR) { dev_err(&drv_data->pdev->dev, "FIFO overrun\n"); diff --git a/drivers/spi/spi-pxa2xx-pxadma.c b/drivers/spi/spi-pxa2xx-pxadma.c index e8a26f25d5c0..2e0796a0003f 100644 --- a/drivers/spi/spi-pxa2xx-pxadma.c +++ b/drivers/spi/spi-pxa2xx-pxadma.c @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/delay.h> @@ -25,6 +21,7 @@ #include <linux/spi/spi.h> #include <linux/spi/pxa2xx_spi.h> +#include <mach/dma.h> #include "spi-pxa2xx.h" #define DMA_INT_MASK (DCSR_ENDINTR | DCSR_STARTINTR | DCSR_BUSERR) @@ -118,11 +115,11 @@ static void pxa2xx_spi_unmap_dma_buffers(struct driver_data *drv_data) drv_data->dma_mapped = 0; } -static int wait_ssp_rx_stall(void const __iomem *ioaddr) +static int wait_ssp_rx_stall(struct driver_data *drv_data) { unsigned long limit = loops_per_jiffy << 1; - while ((read_SSSR(ioaddr) & SSSR_BSY) && --limit) + while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_BSY) && --limit) cpu_relax(); return limit; @@ -141,17 +138,18 @@ static int wait_dma_channel_stop(int channel) static void pxa2xx_spi_dma_error_stop(struct driver_data *drv_data, const char *msg) { - void __iomem *reg = drv_data->ioaddr; - /* Stop and reset */ DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL; DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL; write_SSSR_CS(drv_data, drv_data->clear_sr); - write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg); + pxa2xx_spi_write(drv_data, SSCR1, + pxa2xx_spi_read(drv_data, SSCR1) + & ~drv_data->dma_cr1); if (!pxa25x_ssp_comp(drv_data)) - write_SSTO(0, reg); + pxa2xx_spi_write(drv_data, SSTO, 0); pxa2xx_spi_flush(drv_data); - write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg); + pxa2xx_spi_write(drv_data, SSCR0, + pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE); pxa2xx_spi_unmap_dma_buffers(drv_data); @@ -163,11 +161,12 @@ static void pxa2xx_spi_dma_error_stop(struct driver_data *drv_data, static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; struct spi_message *msg = drv_data->cur_msg; /* Clear and disable interrupts on SSP and DMA channels*/ - write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg); + pxa2xx_spi_write(drv_data, SSCR1, + pxa2xx_spi_read(drv_data, SSCR1) + & ~drv_data->dma_cr1); write_SSSR_CS(drv_data, drv_data->clear_sr); DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL; DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL; @@ -228,7 +227,7 @@ void pxa2xx_spi_dma_handler(int channel, void *data) && (drv_data->ssp_type == PXA25x_SSP)) { /* Wait for rx to stall */ - if (wait_ssp_rx_stall(drv_data->ioaddr) == 0) + if (wait_ssp_rx_stall(drv_data) == 0) dev_err(&drv_data->pdev->dev, "dma_handler: ssp rx stall failed\n"); @@ -240,9 +239,8 @@ void pxa2xx_spi_dma_handler(int channel, void *data) irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data) { u32 irq_status; - void __iomem *reg = drv_data->ioaddr; - irq_status = read_SSSR(reg) & drv_data->mask_sr; + irq_status = pxa2xx_spi_read(drv_data, SSSR) & drv_data->mask_sr; if (irq_status & SSSR_ROR) { pxa2xx_spi_dma_error_stop(drv_data, "dma_transfer: fifo overrun"); @@ -252,7 +250,7 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data) /* Check for false positive timeout */ if ((irq_status & SSSR_TINT) && (DCSR(drv_data->tx_channel) & DCSR_RUN)) { - write_SSSR(SSSR_TINT, reg); + pxa2xx_spi_write(drv_data, SSSR, SSSR_TINT); return IRQ_HANDLED; } @@ -261,7 +259,7 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data) /* Clear and disable timeout interrupt, do the rest in * dma_transfer_complete */ if (!pxa25x_ssp_comp(drv_data)) - write_SSTO(0, reg); + pxa2xx_spi_write(drv_data, SSTO, 0); /* finish this transfer, start the next */ pxa2xx_spi_dma_transfer_complete(drv_data); diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 23822e7df6c1..6f72ad01e041 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/init.h> @@ -45,8 +41,6 @@ MODULE_DESCRIPTION("PXA2xx SSP SPI Controller"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:pxa2xx-spi"); -#define MAX_BUSES 3 - #define TIMOUT_DFLT 1000 /* @@ -162,7 +156,6 @@ pxa2xx_spi_get_rx_default_thre(const struct driver_data *drv_data) static bool pxa2xx_spi_txfifo_full(const struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; u32 mask; switch (drv_data->ssp_type) { @@ -174,7 +167,7 @@ static bool pxa2xx_spi_txfifo_full(const struct driver_data *drv_data) break; } - return (read_SSSR(reg) & mask) == mask; + return (pxa2xx_spi_read(drv_data, SSSR) & mask) == mask; } static void pxa2xx_spi_clear_rx_thre(const struct driver_data *drv_data, @@ -253,9 +246,6 @@ static void lpss_ssp_setup(struct driver_data *drv_data) unsigned offset = 0x400; u32 value, orig; - if (!is_lpss_ssp(drv_data)) - return; - /* * Perform auto-detection of the LPSS SSP private registers. They * can be either at 1k or 2k offset from the base address. @@ -304,9 +294,6 @@ static void lpss_ssp_cs_control(struct driver_data *drv_data, bool enable) { u32 value; - if (!is_lpss_ssp(drv_data)) - return; - value = __lpss_ssp_read_priv(drv_data, SPI_CS_CONTROL); if (enable) value &= ~SPI_CS_CONTROL_CS_HIGH; @@ -320,7 +307,7 @@ static void cs_assert(struct driver_data *drv_data) struct chip_data *chip = drv_data->cur_chip; if (drv_data->ssp_type == CE4100_SSP) { - write_SSSR(drv_data->cur_chip->frm, drv_data->ioaddr); + pxa2xx_spi_write(drv_data, SSSR, drv_data->cur_chip->frm); return; } @@ -334,7 +321,8 @@ static void cs_assert(struct driver_data *drv_data) return; } - lpss_ssp_cs_control(drv_data, true); + if (is_lpss_ssp(drv_data)) + lpss_ssp_cs_control(drv_data, true); } static void cs_deassert(struct driver_data *drv_data) @@ -354,20 +342,18 @@ static void cs_deassert(struct driver_data *drv_data) return; } - lpss_ssp_cs_control(drv_data, false); + if (is_lpss_ssp(drv_data)) + lpss_ssp_cs_control(drv_data, false); } int pxa2xx_spi_flush(struct driver_data *drv_data) { unsigned long limit = loops_per_jiffy << 1; - void __iomem *reg = drv_data->ioaddr; - do { - while (read_SSSR(reg) & SSSR_RNE) { - read_SSDR(reg); - } - } while ((read_SSSR(reg) & SSSR_BSY) && --limit); + while (pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) + pxa2xx_spi_read(drv_data, SSDR); + } while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_BSY) && --limit); write_SSSR_CS(drv_data, SSSR_ROR); return limit; @@ -375,14 +361,13 @@ int pxa2xx_spi_flush(struct driver_data *drv_data) static int null_writer(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; u8 n_bytes = drv_data->n_bytes; if (pxa2xx_spi_txfifo_full(drv_data) || (drv_data->tx == drv_data->tx_end)) return 0; - write_SSDR(0, reg); + pxa2xx_spi_write(drv_data, SSDR, 0); drv_data->tx += n_bytes; return 1; @@ -390,12 +375,11 @@ static int null_writer(struct driver_data *drv_data) static int null_reader(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; u8 n_bytes = drv_data->n_bytes; - while ((read_SSSR(reg) & SSSR_RNE) - && (drv_data->rx < drv_data->rx_end)) { - read_SSDR(reg); + while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) + && (drv_data->rx < drv_data->rx_end)) { + pxa2xx_spi_read(drv_data, SSDR); drv_data->rx += n_bytes; } @@ -404,13 +388,11 @@ static int null_reader(struct driver_data *drv_data) static int u8_writer(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; - if (pxa2xx_spi_txfifo_full(drv_data) || (drv_data->tx == drv_data->tx_end)) return 0; - write_SSDR(*(u8 *)(drv_data->tx), reg); + pxa2xx_spi_write(drv_data, SSDR, *(u8 *)(drv_data->tx)); ++drv_data->tx; return 1; @@ -418,11 +400,9 @@ static int u8_writer(struct driver_data *drv_data) static int u8_reader(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; - - while ((read_SSSR(reg) & SSSR_RNE) - && (drv_data->rx < drv_data->rx_end)) { - *(u8 *)(drv_data->rx) = read_SSDR(reg); + while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) + && (drv_data->rx < drv_data->rx_end)) { + *(u8 *)(drv_data->rx) = pxa2xx_spi_read(drv_data, SSDR); ++drv_data->rx; } @@ -431,13 +411,11 @@ static int u8_reader(struct driver_data *drv_data) static int u16_writer(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; - if (pxa2xx_spi_txfifo_full(drv_data) || (drv_data->tx == drv_data->tx_end)) return 0; - write_SSDR(*(u16 *)(drv_data->tx), reg); + pxa2xx_spi_write(drv_data, SSDR, *(u16 *)(drv_data->tx)); drv_data->tx += 2; return 1; @@ -445,11 +423,9 @@ static int u16_writer(struct driver_data *drv_data) static int u16_reader(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; - - while ((read_SSSR(reg) & SSSR_RNE) - && (drv_data->rx < drv_data->rx_end)) { - *(u16 *)(drv_data->rx) = read_SSDR(reg); + while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) + && (drv_data->rx < drv_data->rx_end)) { + *(u16 *)(drv_data->rx) = pxa2xx_spi_read(drv_data, SSDR); drv_data->rx += 2; } @@ -458,13 +434,11 @@ static int u16_reader(struct driver_data *drv_data) static int u32_writer(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; - if (pxa2xx_spi_txfifo_full(drv_data) || (drv_data->tx == drv_data->tx_end)) return 0; - write_SSDR(*(u32 *)(drv_data->tx), reg); + pxa2xx_spi_write(drv_data, SSDR, *(u32 *)(drv_data->tx)); drv_data->tx += 4; return 1; @@ -472,11 +446,9 @@ static int u32_writer(struct driver_data *drv_data) static int u32_reader(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; - - while ((read_SSSR(reg) & SSSR_RNE) - && (drv_data->rx < drv_data->rx_end)) { - *(u32 *)(drv_data->rx) = read_SSDR(reg); + while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) + && (drv_data->rx < drv_data->rx_end)) { + *(u32 *)(drv_data->rx) = pxa2xx_spi_read(drv_data, SSDR); drv_data->rx += 4; } @@ -552,27 +524,25 @@ static void giveback(struct driver_data *drv_data) static void reset_sccr1(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; struct chip_data *chip = drv_data->cur_chip; u32 sccr1_reg; - sccr1_reg = read_SSCR1(reg) & ~drv_data->int_cr1; + sccr1_reg = pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1; sccr1_reg &= ~SSCR1_RFT; sccr1_reg |= chip->threshold; - write_SSCR1(sccr1_reg, reg); + pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg); } static void int_error_stop(struct driver_data *drv_data, const char* msg) { - void __iomem *reg = drv_data->ioaddr; - /* Stop and reset SSP */ write_SSSR_CS(drv_data, drv_data->clear_sr); reset_sccr1(drv_data); if (!pxa25x_ssp_comp(drv_data)) - write_SSTO(0, reg); + pxa2xx_spi_write(drv_data, SSTO, 0); pxa2xx_spi_flush(drv_data); - write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg); + pxa2xx_spi_write(drv_data, SSCR0, + pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE); dev_err(&drv_data->pdev->dev, "%s\n", msg); @@ -582,13 +552,11 @@ static void int_error_stop(struct driver_data *drv_data, const char* msg) static void int_transfer_complete(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; - /* Stop SSP */ write_SSSR_CS(drv_data, drv_data->clear_sr); reset_sccr1(drv_data); if (!pxa25x_ssp_comp(drv_data)) - write_SSTO(0, reg); + pxa2xx_spi_write(drv_data, SSTO, 0); /* Update total byte transferred return count actual bytes read */ drv_data->cur_msg->actual_length += drv_data->len - @@ -607,12 +575,10 @@ static void int_transfer_complete(struct driver_data *drv_data) static irqreturn_t interrupt_transfer(struct driver_data *drv_data) { - void __iomem *reg = drv_data->ioaddr; + u32 irq_mask = (pxa2xx_spi_read(drv_data, SSCR1) & SSCR1_TIE) ? + drv_data->mask_sr : drv_data->mask_sr & ~SSSR_TFS; - u32 irq_mask = (read_SSCR1(reg) & SSCR1_TIE) ? - drv_data->mask_sr : drv_data->mask_sr & ~SSSR_TFS; - - u32 irq_status = read_SSSR(reg) & irq_mask; + u32 irq_status = pxa2xx_spi_read(drv_data, SSSR) & irq_mask; if (irq_status & SSSR_ROR) { int_error_stop(drv_data, "interrupt_transfer: fifo overrun"); @@ -620,7 +586,7 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data) } if (irq_status & SSSR_TINT) { - write_SSSR(SSSR_TINT, reg); + pxa2xx_spi_write(drv_data, SSSR, SSSR_TINT); if (drv_data->read(drv_data)) { int_transfer_complete(drv_data); return IRQ_HANDLED; @@ -644,7 +610,7 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data) u32 bytes_left; u32 sccr1_reg; - sccr1_reg = read_SSCR1(reg); + sccr1_reg = pxa2xx_spi_read(drv_data, SSCR1); sccr1_reg &= ~SSCR1_TIE; /* @@ -670,7 +636,7 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data) pxa2xx_spi_set_rx_thre(drv_data, &sccr1_reg, rx_thre); } - write_SSCR1(sccr1_reg, reg); + pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg); } /* We did something */ @@ -680,7 +646,6 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data) static irqreturn_t ssp_int(int irq, void *dev_id) { struct driver_data *drv_data = dev_id; - void __iomem *reg = drv_data->ioaddr; u32 sccr1_reg; u32 mask = drv_data->mask_sr; u32 status; @@ -700,11 +665,11 @@ static irqreturn_t ssp_int(int irq, void *dev_id) * are all set to one. That means that the device is already * powered off. */ - status = read_SSSR(reg); + status = pxa2xx_spi_read(drv_data, SSSR); if (status == ~0) return IRQ_NONE; - sccr1_reg = read_SSCR1(reg); + sccr1_reg = pxa2xx_spi_read(drv_data, SSCR1); /* Ignore possible writes if we don't need to write */ if (!(sccr1_reg & SSCR1_TIE)) @@ -715,10 +680,14 @@ static irqreturn_t ssp_int(int irq, void *dev_id) if (!drv_data->cur_msg) { - write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg); - write_SSCR1(read_SSCR1(reg) & ~drv_data->int_cr1, reg); + pxa2xx_spi_write(drv_data, SSCR0, + pxa2xx_spi_read(drv_data, SSCR0) + & ~SSCR0_SSE); + pxa2xx_spi_write(drv_data, SSCR1, + pxa2xx_spi_read(drv_data, SSCR1) + & ~drv_data->int_cr1); if (!pxa25x_ssp_comp(drv_data)) - write_SSTO(0, reg); + pxa2xx_spi_write(drv_data, SSTO, 0); write_SSSR_CS(drv_data, drv_data->clear_sr); dev_err(&drv_data->pdev->dev, @@ -787,7 +756,6 @@ static void pump_transfers(unsigned long data) struct spi_transfer *transfer = NULL; struct spi_transfer *previous = NULL; struct chip_data *chip = NULL; - void __iomem *reg = drv_data->ioaddr; u32 clk_div = 0; u8 bits = 0; u32 speed = 0; @@ -931,7 +899,7 @@ static void pump_transfers(unsigned long data) /* Clear status and start DMA engine */ cr1 = chip->cr1 | dma_thresh | drv_data->dma_cr1; - write_SSSR(drv_data->clear_sr, reg); + pxa2xx_spi_write(drv_data, SSSR, drv_data->clear_sr); pxa2xx_spi_dma_start(drv_data); } else { @@ -944,39 +912,43 @@ static void pump_transfers(unsigned long data) } if (is_lpss_ssp(drv_data)) { - if ((read_SSIRF(reg) & 0xff) != chip->lpss_rx_threshold) - write_SSIRF(chip->lpss_rx_threshold, reg); - if ((read_SSITF(reg) & 0xffff) != chip->lpss_tx_threshold) - write_SSITF(chip->lpss_tx_threshold, reg); + if ((pxa2xx_spi_read(drv_data, SSIRF) & 0xff) + != chip->lpss_rx_threshold) + pxa2xx_spi_write(drv_data, SSIRF, + chip->lpss_rx_threshold); + if ((pxa2xx_spi_read(drv_data, SSITF) & 0xffff) + != chip->lpss_tx_threshold) + pxa2xx_spi_write(drv_data, SSITF, + chip->lpss_tx_threshold); } if (is_quark_x1000_ssp(drv_data) && - (read_DDS_RATE(reg) != chip->dds_rate)) - write_DDS_RATE(chip->dds_rate, reg); + (pxa2xx_spi_read(drv_data, DDS_RATE) != chip->dds_rate)) + pxa2xx_spi_write(drv_data, DDS_RATE, chip->dds_rate); /* see if we need to reload the config registers */ - if ((read_SSCR0(reg) != cr0) || - (read_SSCR1(reg) & change_mask) != (cr1 & change_mask)) { - + if ((pxa2xx_spi_read(drv_data, SSCR0) != cr0) + || (pxa2xx_spi_read(drv_data, SSCR1) & change_mask) + != (cr1 & change_mask)) { /* stop the SSP, and update the other bits */ - write_SSCR0(cr0 & ~SSCR0_SSE, reg); + pxa2xx_spi_write(drv_data, SSCR0, cr0 & ~SSCR0_SSE); if (!pxa25x_ssp_comp(drv_data)) - write_SSTO(chip->timeout, reg); + pxa2xx_spi_write(drv_data, SSTO, chip->timeout); /* first set CR1 without interrupt and service enables */ - write_SSCR1(cr1 & change_mask, reg); + pxa2xx_spi_write(drv_data, SSCR1, cr1 & change_mask); /* restart the SSP */ - write_SSCR0(cr0, reg); + pxa2xx_spi_write(drv_data, SSCR0, cr0); } else { if (!pxa25x_ssp_comp(drv_data)) - write_SSTO(chip->timeout, reg); + pxa2xx_spi_write(drv_data, SSTO, chip->timeout); } cs_assert(drv_data); /* after chip select, release the data by enabling service * requests and interrupts, without changing any mode bits */ - write_SSCR1(cr1, reg); + pxa2xx_spi_write(drv_data, SSCR1, cr1); } static int pxa2xx_spi_transfer_one_message(struct spi_master *master, @@ -1005,8 +977,8 @@ static int pxa2xx_spi_unprepare_transfer(struct spi_master *master) struct driver_data *drv_data = spi_master_get_devdata(master); /* Disable the SSP now */ - write_SSCR0(read_SSCR0(drv_data->ioaddr) & ~SSCR0_SSE, - drv_data->ioaddr); + pxa2xx_spi_write(drv_data, SSCR0, + pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE); return 0; } @@ -1289,6 +1261,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) struct driver_data *drv_data; struct ssp_device *ssp; int status; + u32 tmp; platform_info = dev_get_platdata(dev); if (!platform_info) { @@ -1386,38 +1359,35 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) drv_data->max_clk_rate = clk_get_rate(ssp->clk); /* Load default SSP configuration */ - write_SSCR0(0, drv_data->ioaddr); + pxa2xx_spi_write(drv_data, SSCR0, 0); switch (drv_data->ssp_type) { case QUARK_X1000_SSP: - write_SSCR1(QUARK_X1000_SSCR1_RxTresh( - RX_THRESH_QUARK_X1000_DFLT) | - QUARK_X1000_SSCR1_TxTresh( - TX_THRESH_QUARK_X1000_DFLT), - drv_data->ioaddr); + tmp = QUARK_X1000_SSCR1_RxTresh(RX_THRESH_QUARK_X1000_DFLT) + | QUARK_X1000_SSCR1_TxTresh(TX_THRESH_QUARK_X1000_DFLT); + pxa2xx_spi_write(drv_data, SSCR1, tmp); /* using the Motorola SPI protocol and use 8 bit frame */ - write_SSCR0(QUARK_X1000_SSCR0_Motorola - | QUARK_X1000_SSCR0_DataSize(8), - drv_data->ioaddr); + pxa2xx_spi_write(drv_data, SSCR0, + QUARK_X1000_SSCR0_Motorola + | QUARK_X1000_SSCR0_DataSize(8)); break; default: - write_SSCR1(SSCR1_RxTresh(RX_THRESH_DFLT) | - SSCR1_TxTresh(TX_THRESH_DFLT), - drv_data->ioaddr); - write_SSCR0(SSCR0_SCR(2) - | SSCR0_Motorola - | SSCR0_DataSize(8), - drv_data->ioaddr); + tmp = SSCR1_RxTresh(RX_THRESH_DFLT) | + SSCR1_TxTresh(TX_THRESH_DFLT); + pxa2xx_spi_write(drv_data, SSCR1, tmp); + tmp = SSCR0_SCR(2) | SSCR0_Motorola | SSCR0_DataSize(8); + pxa2xx_spi_write(drv_data, SSCR0, tmp); break; } if (!pxa25x_ssp_comp(drv_data)) - write_SSTO(0, drv_data->ioaddr); + pxa2xx_spi_write(drv_data, SSTO, 0); if (!is_quark_x1000_ssp(drv_data)) - write_SSPSP(0, drv_data->ioaddr); + pxa2xx_spi_write(drv_data, SSPSP, 0); - lpss_ssp_setup(drv_data); + if (is_lpss_ssp(drv_data)) + lpss_ssp_setup(drv_data); tasklet_init(&drv_data->pump_transfers, pump_transfers, (unsigned long)drv_data); @@ -1460,7 +1430,7 @@ static int pxa2xx_spi_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); /* Disable the SSP at the peripheral and SOC level */ - write_SSCR0(0, drv_data->ioaddr); + pxa2xx_spi_write(drv_data, SSCR0, 0); clk_disable_unprepare(ssp->clk); /* Release DMA */ @@ -1497,7 +1467,7 @@ static int pxa2xx_spi_suspend(struct device *dev) status = spi_master_suspend(drv_data->master); if (status != 0) return status; - write_SSCR0(0, drv_data->ioaddr); + pxa2xx_spi_write(drv_data, SSCR0, 0); if (!pm_runtime_suspended(dev)) clk_disable_unprepare(ssp->clk); @@ -1518,7 +1488,8 @@ static int pxa2xx_spi_resume(struct device *dev) clk_prepare_enable(ssp->clk); /* Restore LPSS private register bits */ - lpss_ssp_setup(drv_data); + if (is_lpss_ssp(drv_data)) + lpss_ssp_setup(drv_data); /* Start the queue running */ status = spi_master_resume(drv_data->master); diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index 6bec59c90cd4..85a58c906869 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -115,23 +115,17 @@ struct chip_data { void (*cs_control)(u32 command); }; -#define DEFINE_SSP_REG(reg, off) \ -static inline u32 read_##reg(void const __iomem *p) \ -{ return __raw_readl(p + (off)); } \ -\ -static inline void write_##reg(u32 v, void __iomem *p) \ -{ __raw_writel(v, p + (off)); } - -DEFINE_SSP_REG(SSCR0, 0x00) -DEFINE_SSP_REG(SSCR1, 0x04) -DEFINE_SSP_REG(SSSR, 0x08) -DEFINE_SSP_REG(SSITR, 0x0c) -DEFINE_SSP_REG(SSDR, 0x10) -DEFINE_SSP_REG(DDS_RATE, 0x28) /* DDS Clock Rate */ -DEFINE_SSP_REG(SSTO, 0x28) -DEFINE_SSP_REG(SSPSP, 0x2c) -DEFINE_SSP_REG(SSITF, SSITF) -DEFINE_SSP_REG(SSIRF, SSIRF) +static inline u32 pxa2xx_spi_read(const struct driver_data *drv_data, + unsigned reg) +{ + return __raw_readl(drv_data->ioaddr + reg); +} + +static inline void pxa2xx_spi_write(const struct driver_data *drv_data, + unsigned reg, u32 val) +{ + __raw_writel(val, drv_data->ioaddr + reg); +} #define START_STATE ((void *)0) #define RUNNING_STATE ((void *)1) @@ -155,13 +149,11 @@ static inline int pxa25x_ssp_comp(struct driver_data *drv_data) static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val) { - void __iomem *reg = drv_data->ioaddr; - if (drv_data->ssp_type == CE4100_SSP || drv_data->ssp_type == QUARK_X1000_SSP) - val |= read_SSSR(reg) & SSSR_ALT_FRM_MASK; + val |= pxa2xx_spi_read(drv_data, SSSR) & SSSR_ALT_FRM_MASK; - write_SSSR(val, reg); + pxa2xx_spi_write(drv_data, SSSR, val); } extern int pxa2xx_spi_flush(struct driver_data *drv_data); diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index e7fb5a0d2e8d..ff9cdbdb6672 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -337,7 +337,7 @@ static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id) static int spi_qup_io_config(struct spi_device *spi, struct spi_transfer *xfer) { struct spi_qup *controller = spi_master_get_devdata(spi->master); - u32 config, iomode, mode; + u32 config, iomode, mode, control; int ret, n_words, w_size; if (spi->mode & SPI_LOOP && xfer->len > controller->in_fifo_sz) { @@ -392,6 +392,15 @@ static int spi_qup_io_config(struct spi_device *spi, struct spi_transfer *xfer) writel_relaxed(iomode, controller->base + QUP_IO_M_MODES); + control = readl_relaxed(controller->base + SPI_IO_CONTROL); + + if (spi->mode & SPI_CPOL) + control |= SPI_IO_C_CLK_IDLE_HIGH; + else + control &= ~SPI_IO_C_CLK_IDLE_HIGH; + + writel_relaxed(control, controller->base + SPI_IO_CONTROL); + config = readl_relaxed(controller->base + SPI_CONFIG); if (spi->mode & SPI_LOOP) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index daabbabd26b0..1a777dc261d6 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -437,6 +437,7 @@ static void rockchip_spi_prepare_dma(struct rockchip_spi *rs) rs->state &= ~TXBUSY; spin_unlock_irqrestore(&rs->lock, flags); + rxdesc = NULL; if (rs->rx) { rxconf.direction = rs->dma_rx.direction; rxconf.src_addr = rs->dma_rx.addr; @@ -453,6 +454,7 @@ static void rockchip_spi_prepare_dma(struct rockchip_spi *rs) rxdesc->callback_param = rs; } + txdesc = NULL; if (rs->tx) { txconf.direction = rs->dma_tx.direction; txconf.dst_addr = rs->dma_tx.addr; @@ -470,7 +472,7 @@ static void rockchip_spi_prepare_dma(struct rockchip_spi *rs) } /* rx must be started before tx due to spi instinct */ - if (rs->rx) { + if (rxdesc) { spin_lock_irqsave(&rs->lock, flags); rs->state |= RXBUSY; spin_unlock_irqrestore(&rs->lock, flags); @@ -478,7 +480,7 @@ static void rockchip_spi_prepare_dma(struct rockchip_spi *rs) dma_async_issue_pending(rs->dma_rx.ch); } - if (rs->tx) { + if (txdesc) { spin_lock_irqsave(&rs->lock, flags); rs->state |= TXBUSY; spin_unlock_irqrestore(&rs->lock, flags); diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 2071f788c6fb..46ce47076e63 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -15,11 +15,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #include <linux/module.h> diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 37b19836f5cb..9231c34b5a5c 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/init.h> diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c index 237f2e7a7179..5a56acf8a43e 100644 --- a/drivers/spi/spi-sc18is602.c +++ b/drivers/spi/spi-sc18is602.c @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/kernel.h> diff --git a/drivers/spi/spi-sh-hspi.c b/drivers/spi/spi-sh-hspi.c index fc29233d0650..20e800e70442 100644 --- a/drivers/spi/spi-sh-hspi.c +++ b/drivers/spi/spi-sh-hspi.c @@ -16,11 +16,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #include <linux/clk.h> diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 3ab7a21445fc..e57eec0b2f46 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -82,6 +82,8 @@ struct sh_msiof_spi_priv { #define MDR1_SYNCMD_LR 0x30000000 /* L/R mode */ #define MDR1_SYNCAC_SHIFT 25 /* Sync Polarity (1 = Active-low) */ #define MDR1_BITLSB_SHIFT 24 /* MSB/LSB First (1 = LSB first) */ +#define MDR1_DTDL_SHIFT 20 /* Data Pin Bit Delay for MSIOF_SYNC */ +#define MDR1_SYNCDL_SHIFT 16 /* Frame Sync Signal Timing Delay */ #define MDR1_FLD_MASK 0x0000000c /* Frame Sync Signal Interval (0-3) */ #define MDR1_FLD_SHIFT 2 #define MDR1_XXSTP 0x00000001 /* Transmission/Reception Stop on FIFO */ @@ -241,42 +243,80 @@ static irqreturn_t sh_msiof_spi_irq(int irq, void *data) static struct { unsigned short div; - unsigned short scr; -} const sh_msiof_spi_clk_table[] = { - { 1, SCR_BRPS( 1) | SCR_BRDV_DIV_1 }, - { 2, SCR_BRPS( 1) | SCR_BRDV_DIV_2 }, - { 4, SCR_BRPS( 1) | SCR_BRDV_DIV_4 }, - { 8, SCR_BRPS( 1) | SCR_BRDV_DIV_8 }, - { 16, SCR_BRPS( 1) | SCR_BRDV_DIV_16 }, - { 32, SCR_BRPS( 1) | SCR_BRDV_DIV_32 }, - { 64, SCR_BRPS(32) | SCR_BRDV_DIV_2 }, - { 128, SCR_BRPS(32) | SCR_BRDV_DIV_4 }, - { 256, SCR_BRPS(32) | SCR_BRDV_DIV_8 }, - { 512, SCR_BRPS(32) | SCR_BRDV_DIV_16 }, - { 1024, SCR_BRPS(32) | SCR_BRDV_DIV_32 }, + unsigned short brdv; +} const sh_msiof_spi_div_table[] = { + { 1, SCR_BRDV_DIV_1 }, + { 2, SCR_BRDV_DIV_2 }, + { 4, SCR_BRDV_DIV_4 }, + { 8, SCR_BRDV_DIV_8 }, + { 16, SCR_BRDV_DIV_16 }, + { 32, SCR_BRDV_DIV_32 }, }; static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p, unsigned long parent_rate, u32 spi_hz) { unsigned long div = 1024; + u32 brps, scr; size_t k; if (!WARN_ON(!spi_hz || !parent_rate)) div = DIV_ROUND_UP(parent_rate, spi_hz); - /* TODO: make more fine grained */ - - for (k = 0; k < ARRAY_SIZE(sh_msiof_spi_clk_table); k++) { - if (sh_msiof_spi_clk_table[k].div >= div) + for (k = 0; k < ARRAY_SIZE(sh_msiof_spi_div_table); k++) { + brps = DIV_ROUND_UP(div, sh_msiof_spi_div_table[k].div); + if (brps <= 32) /* max of brdv is 32 */ break; } - k = min_t(int, k, ARRAY_SIZE(sh_msiof_spi_clk_table) - 1); + k = min_t(int, k, ARRAY_SIZE(sh_msiof_spi_div_table) - 1); - sh_msiof_write(p, TSCR, sh_msiof_spi_clk_table[k].scr); + scr = sh_msiof_spi_div_table[k].brdv | SCR_BRPS(brps); + sh_msiof_write(p, TSCR, scr); if (!(p->chipdata->master_flags & SPI_MASTER_MUST_TX)) - sh_msiof_write(p, RSCR, sh_msiof_spi_clk_table[k].scr); + sh_msiof_write(p, RSCR, scr); +} + +static u32 sh_msiof_get_delay_bit(u32 dtdl_or_syncdl) +{ + /* + * DTDL/SYNCDL bit : p->info->dtdl or p->info->syncdl + * b'000 : 0 + * b'001 : 100 + * b'010 : 200 + * b'011 (SYNCDL only) : 300 + * b'101 : 50 + * b'110 : 150 + */ + if (dtdl_or_syncdl % 100) + return dtdl_or_syncdl / 100 + 5; + else + return dtdl_or_syncdl / 100; +} + +static u32 sh_msiof_spi_get_dtdl_and_syncdl(struct sh_msiof_spi_priv *p) +{ + u32 val; + + if (!p->info) + return 0; + + /* check if DTDL and SYNCDL is allowed value */ + if (p->info->dtdl > 200 || p->info->syncdl > 300) { + dev_warn(&p->pdev->dev, "DTDL or SYNCDL is too large\n"); + return 0; + } + + /* check if the sum of DTDL and SYNCDL becomes an integer value */ + if ((p->info->dtdl + p->info->syncdl) % 100) { + dev_warn(&p->pdev->dev, "the sum of DTDL/SYNCDL is not good\n"); + return 0; + } + + val = sh_msiof_get_delay_bit(p->info->dtdl) << MDR1_DTDL_SHIFT; + val |= sh_msiof_get_delay_bit(p->info->syncdl) << MDR1_SYNCDL_SHIFT; + + return val; } static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, @@ -296,6 +336,7 @@ static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, tmp = MDR1_SYNCMD_SPI | 1 << MDR1_FLD_SHIFT | MDR1_XXSTP; tmp |= !cs_high << MDR1_SYNCAC_SHIFT; tmp |= lsb_first << MDR1_BITLSB_SHIFT; + tmp |= sh_msiof_spi_get_dtdl_and_syncdl(p); sh_msiof_write(p, TMDR1, tmp | MDR1_TRMD | TMDR1_PCON); if (p->chipdata->master_flags & SPI_MASTER_MUST_TX) { /* These bits are reserved if RX needs TX */ @@ -501,7 +542,7 @@ static int sh_msiof_spi_setup(struct spi_device *spi) gpio_set_value(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH)); - pm_runtime_put_sync(&p->pdev->dev); + pm_runtime_put(&p->pdev->dev); return 0; } @@ -595,8 +636,7 @@ static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p, } /* wait for tx fifo to be emptied / rx fifo to be filled */ - ret = wait_for_completion_timeout(&p->done, HZ); - if (!ret) { + if (!wait_for_completion_timeout(&p->done, HZ)) { dev_err(&p->pdev->dev, "PIO timeout\n"); ret = -ETIMEDOUT; goto stop_reset; @@ -706,8 +746,7 @@ static int sh_msiof_dma_once(struct sh_msiof_spi_priv *p, const void *tx, } /* wait for tx fifo to be emptied / rx fifo to be filled */ - ret = wait_for_completion_timeout(&p->done, HZ); - if (!ret) { + if (!wait_for_completion_timeout(&p->done, HZ)) { dev_err(&p->pdev->dev, "DMA timeout\n"); ret = -ETIMEDOUT; goto stop_reset; @@ -957,6 +996,8 @@ static struct sh_msiof_spi_info *sh_msiof_spi_parse_dt(struct device *dev) &info->tx_fifo_override); of_property_read_u32(np, "renesas,rx-fifo-size", &info->rx_fifo_override); + of_property_read_u32(np, "renesas,dtdl", &info->dtdl); + of_property_read_u32(np, "renesas,syncdl", &info->syncdl); info->num_chipselect = num_cs; diff --git a/drivers/spi/spi-sh.c b/drivers/spi/spi-sh.c index 1cfc906dd174..502501187c9e 100644 --- a/drivers/spi/spi-sh.c +++ b/drivers/spi/spi-sh.c @@ -14,11 +14,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #include <linux/module.h> diff --git a/drivers/spi/spi-sirf.c b/drivers/spi/spi-sirf.c index d075191476f0..f5715c9f68b0 100644 --- a/drivers/spi/spi-sirf.c +++ b/drivers/spi/spi-sirf.c @@ -818,7 +818,6 @@ static SIMPLE_DEV_PM_OPS(spi_sirfsoc_pm_ops, spi_sirfsoc_suspend, static const struct of_device_id spi_sirfsoc_of_match[] = { { .compatible = "sirf,prima2-spi", }, - { .compatible = "sirf,marco-spi", }, {} }; MODULE_DEVICE_TABLE(of, spi_sirfsoc_of_match); diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c new file mode 100644 index 000000000000..2faeaa7b57a8 --- /dev/null +++ b/drivers/spi/spi-st-ssc4.c @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2008-2014 STMicroelectronics Limited + * + * Author: Angus Clark <Angus.Clark@st.com> + * Patrice Chotard <patrice.chotard@st.com> + * Lee Jones <lee.jones@linaro.org> + * + * SPI master mode controller driver, used in STMicroelectronics devices. + * + * May be copied or modified under the terms of the GNU General Public + * License Version 2.0 only. See linux/COPYING for more information. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/pinctrl/consumer.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/of_gpio.h> +#include <linux/of_irq.h> +#include <linux/pm_runtime.h> +#include <linux/spi/spi.h> +#include <linux/spi/spi_bitbang.h> + +/* SSC registers */ +#define SSC_BRG 0x000 +#define SSC_TBUF 0x004 +#define SSC_RBUF 0x008 +#define SSC_CTL 0x00C +#define SSC_IEN 0x010 +#define SSC_I2C 0x018 + +/* SSC Control */ +#define SSC_CTL_DATA_WIDTH_9 0x8 +#define SSC_CTL_DATA_WIDTH_MSK 0xf +#define SSC_CTL_BM 0xf +#define SSC_CTL_HB BIT(4) +#define SSC_CTL_PH BIT(5) +#define SSC_CTL_PO BIT(6) +#define SSC_CTL_SR BIT(7) +#define SSC_CTL_MS BIT(8) +#define SSC_CTL_EN BIT(9) +#define SSC_CTL_LPB BIT(10) +#define SSC_CTL_EN_TX_FIFO BIT(11) +#define SSC_CTL_EN_RX_FIFO BIT(12) +#define SSC_CTL_EN_CLST_RX BIT(13) + +/* SSC Interrupt Enable */ +#define SSC_IEN_TEEN BIT(2) + +#define FIFO_SIZE 8 + +struct spi_st { + /* SSC SPI Controller */ + void __iomem *base; + struct clk *clk; + struct device *dev; + + /* SSC SPI current transaction */ + const u8 *tx_ptr; + u8 *rx_ptr; + u16 bytes_per_word; + unsigned int words_remaining; + unsigned int baud; + struct completion done; +}; + +static int spi_st_clk_enable(struct spi_st *spi_st) +{ + /* + * Current platforms use one of the core clocks for SPI and I2C. + * If we attempt to disable the clock, the system will hang. + * + * TODO: Remove this when platform supports power domains. + */ + return 0; + + return clk_prepare_enable(spi_st->clk); +} + +static void spi_st_clk_disable(struct spi_st *spi_st) +{ + /* + * Current platforms use one of the core clocks for SPI and I2C. + * If we attempt to disable the clock, the system will hang. + * + * TODO: Remove this when platform supports power domains. + */ + return; + + clk_disable_unprepare(spi_st->clk); +} + +/* Load the TX FIFO */ +static void ssc_write_tx_fifo(struct spi_st *spi_st) +{ + unsigned int count, i; + uint32_t word = 0; + + if (spi_st->words_remaining > FIFO_SIZE) + count = FIFO_SIZE; + else + count = spi_st->words_remaining; + + for (i = 0; i < count; i++) { + if (spi_st->tx_ptr) { + if (spi_st->bytes_per_word == 1) { + word = *spi_st->tx_ptr++; + } else { + word = *spi_st->tx_ptr++; + word = *spi_st->tx_ptr++ | (word << 8); + } + } + writel_relaxed(word, spi_st->base + SSC_TBUF); + } +} + +/* Read the RX FIFO */ +static void ssc_read_rx_fifo(struct spi_st *spi_st) +{ + unsigned int count, i; + uint32_t word = 0; + + if (spi_st->words_remaining > FIFO_SIZE) + count = FIFO_SIZE; + else + count = spi_st->words_remaining; + + for (i = 0; i < count; i++) { + word = readl_relaxed(spi_st->base + SSC_RBUF); + + if (spi_st->rx_ptr) { + if (spi_st->bytes_per_word == 1) { + *spi_st->rx_ptr++ = (uint8_t)word; + } else { + *spi_st->rx_ptr++ = (word >> 8); + *spi_st->rx_ptr++ = word & 0xff; + } + } + } + spi_st->words_remaining -= count; +} + +static int spi_st_transfer_one(struct spi_master *master, + struct spi_device *spi, struct spi_transfer *t) +{ + struct spi_st *spi_st = spi_master_get_devdata(master); + uint32_t ctl = 0; + + /* Setup transfer */ + spi_st->tx_ptr = t->tx_buf; + spi_st->rx_ptr = t->rx_buf; + + if (spi->bits_per_word > 8) { + /* + * Anything greater than 8 bits-per-word requires 2 + * bytes-per-word in the RX/TX buffers + */ + spi_st->bytes_per_word = 2; + spi_st->words_remaining = t->len / 2; + + } else if (spi->bits_per_word == 8 && !(t->len & 0x1)) { + /* + * If transfer is even-length, and 8 bits-per-word, then + * implement as half-length 16 bits-per-word transfer + */ + spi_st->bytes_per_word = 2; + spi_st->words_remaining = t->len / 2; + + /* Set SSC_CTL to 16 bits-per-word */ + ctl = readl_relaxed(spi_st->base + SSC_CTL); + writel_relaxed((ctl | 0xf), spi_st->base + SSC_CTL); + + readl_relaxed(spi_st->base + SSC_RBUF); + + } else { + spi_st->bytes_per_word = 1; + spi_st->words_remaining = t->len; + } + + reinit_completion(&spi_st->done); + + /* Start transfer by writing to the TX FIFO */ + ssc_write_tx_fifo(spi_st); + writel_relaxed(SSC_IEN_TEEN, spi_st->base + SSC_IEN); + + /* Wait for transfer to complete */ + wait_for_completion(&spi_st->done); + + /* Restore SSC_CTL if necessary */ + if (ctl) + writel_relaxed(ctl, spi_st->base + SSC_CTL); + + spi_finalize_current_transfer(spi->master); + + return t->len; +} + +static void spi_st_cleanup(struct spi_device *spi) +{ + int cs = spi->cs_gpio; + + if (gpio_is_valid(cs)) + devm_gpio_free(&spi->dev, cs); +} + +/* the spi->mode bits understood by this driver: */ +#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST | SPI_LOOP | SPI_CS_HIGH) +static int spi_st_setup(struct spi_device *spi) +{ + struct spi_st *spi_st = spi_master_get_devdata(spi->master); + u32 spi_st_clk, sscbrg, var; + u32 hz = spi->max_speed_hz; + int cs = spi->cs_gpio; + int ret; + + if (!hz) { + dev_err(&spi->dev, "max_speed_hz unspecified\n"); + return -EINVAL; + } + + if (!gpio_is_valid(cs)) { + dev_err(&spi->dev, "%d is not a valid gpio\n", cs); + return -EINVAL; + } + + if (devm_gpio_request(&spi->dev, cs, dev_name(&spi->dev))) { + dev_err(&spi->dev, "could not request gpio:%d\n", cs); + return -EINVAL; + } + + ret = gpio_direction_output(cs, spi->mode & SPI_CS_HIGH); + if (ret) + return ret; + + spi_st_clk = clk_get_rate(spi_st->clk); + + /* Set SSC_BRF */ + sscbrg = spi_st_clk / (2 * hz); + if (sscbrg < 0x07 || sscbrg > BIT(16)) { + dev_err(&spi->dev, + "baudrate %d outside valid range %d\n", sscbrg, hz); + return -EINVAL; + } + + spi_st->baud = spi_st_clk / (2 * sscbrg); + if (sscbrg == BIT(16)) /* 16-bit counter wraps */ + sscbrg = 0x0; + + writel_relaxed(sscbrg, spi_st->base + SSC_BRG); + + dev_dbg(&spi->dev, + "setting baudrate:target= %u hz, actual= %u hz, sscbrg= %u\n", + hz, spi_st->baud, sscbrg); + + /* Set SSC_CTL and enable SSC */ + var = readl_relaxed(spi_st->base + SSC_CTL); + var |= SSC_CTL_MS; + + if (spi->mode & SPI_CPOL) + var |= SSC_CTL_PO; + else + var &= ~SSC_CTL_PO; + + if (spi->mode & SPI_CPHA) + var |= SSC_CTL_PH; + else + var &= ~SSC_CTL_PH; + + if ((spi->mode & SPI_LSB_FIRST) == 0) + var |= SSC_CTL_HB; + else + var &= ~SSC_CTL_HB; + + if (spi->mode & SPI_LOOP) + var |= SSC_CTL_LPB; + else + var &= ~SSC_CTL_LPB; + + var &= ~SSC_CTL_DATA_WIDTH_MSK; + var |= (spi->bits_per_word - 1); + + var |= SSC_CTL_EN_TX_FIFO | SSC_CTL_EN_RX_FIFO; + var |= SSC_CTL_EN; + + writel_relaxed(var, spi_st->base + SSC_CTL); + + /* Clear the status register */ + readl_relaxed(spi_st->base + SSC_RBUF); + + return 0; +} + +/* Interrupt fired when TX shift register becomes empty */ +static irqreturn_t spi_st_irq(int irq, void *dev_id) +{ + struct spi_st *spi_st = (struct spi_st *)dev_id; + + /* Read RX FIFO */ + ssc_read_rx_fifo(spi_st); + + /* Fill TX FIFO */ + if (spi_st->words_remaining) { + ssc_write_tx_fifo(spi_st); + } else { + /* TX/RX complete */ + writel_relaxed(0x0, spi_st->base + SSC_IEN); + /* + * read SSC_IEN to ensure that this bit is set + * before re-enabling interrupt + */ + readl(spi_st->base + SSC_IEN); + complete(&spi_st->done); + } + + return IRQ_HANDLED; +} + +static int spi_st_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct spi_master *master; + struct resource *res; + struct spi_st *spi_st; + int irq, ret = 0; + u32 var; + + master = spi_alloc_master(&pdev->dev, sizeof(*spi_st)); + if (!master) + return -ENOMEM; + + master->dev.of_node = np; + master->mode_bits = MODEBITS; + master->setup = spi_st_setup; + master->cleanup = spi_st_cleanup; + master->transfer_one = spi_st_transfer_one; + master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16); + master->auto_runtime_pm = true; + master->bus_num = pdev->id; + spi_st = spi_master_get_devdata(master); + + spi_st->clk = devm_clk_get(&pdev->dev, "ssc"); + if (IS_ERR(spi_st->clk)) { + dev_err(&pdev->dev, "Unable to request clock\n"); + return PTR_ERR(spi_st->clk); + } + + ret = spi_st_clk_enable(spi_st); + if (ret) + return ret; + + init_completion(&spi_st->done); + + /* Get resources */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + spi_st->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(spi_st->base)) { + ret = PTR_ERR(spi_st->base); + goto clk_disable; + } + + /* Disable I2C and Reset SSC */ + writel_relaxed(0x0, spi_st->base + SSC_I2C); + var = readw_relaxed(spi_st->base + SSC_CTL); + var |= SSC_CTL_SR; + writel_relaxed(var, spi_st->base + SSC_CTL); + + udelay(1); + var = readl_relaxed(spi_st->base + SSC_CTL); + var &= ~SSC_CTL_SR; + writel_relaxed(var, spi_st->base + SSC_CTL); + + /* Set SSC into slave mode before reconfiguring PIO pins */ + var = readl_relaxed(spi_st->base + SSC_CTL); + var &= ~SSC_CTL_MS; + writel_relaxed(var, spi_st->base + SSC_CTL); + + irq = irq_of_parse_and_map(np, 0); + if (!irq) { + dev_err(&pdev->dev, "IRQ missing or invalid\n"); + ret = -EINVAL; + goto clk_disable; + } + + ret = devm_request_irq(&pdev->dev, irq, spi_st_irq, 0, + pdev->name, spi_st); + if (ret) { + dev_err(&pdev->dev, "Failed to request irq %d\n", irq); + goto clk_disable; + } + + /* by default the device is on */ + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + platform_set_drvdata(pdev, master); + + ret = devm_spi_register_master(&pdev->dev, master); + if (ret) { + dev_err(&pdev->dev, "Failed to register master\n"); + goto clk_disable; + } + + return 0; + +clk_disable: + spi_st_clk_disable(spi_st); + + return ret; +} + +static int spi_st_remove(struct platform_device *pdev) +{ + struct spi_master *master = platform_get_drvdata(pdev); + struct spi_st *spi_st = spi_master_get_devdata(master); + + spi_st_clk_disable(spi_st); + + pinctrl_pm_select_sleep_state(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_PM +static int spi_st_runtime_suspend(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + struct spi_st *spi_st = spi_master_get_devdata(master); + + writel_relaxed(0, spi_st->base + SSC_IEN); + pinctrl_pm_select_sleep_state(dev); + + spi_st_clk_disable(spi_st); + + return 0; +} + +static int spi_st_runtime_resume(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + struct spi_st *spi_st = spi_master_get_devdata(master); + int ret; + + ret = spi_st_clk_enable(spi_st); + pinctrl_pm_select_default_state(dev); + + return ret; +} +#endif + +#ifdef CONFIG_PM_SLEEP +static int spi_st_suspend(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + int ret; + + ret = spi_master_suspend(master); + if (ret) + return ret; + + return pm_runtime_force_suspend(dev); +} + +static int spi_st_resume(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + int ret; + + ret = spi_master_resume(master); + if (ret) + return ret; + + return pm_runtime_force_resume(dev); +} +#endif + +static const struct dev_pm_ops spi_st_pm = { + SET_SYSTEM_SLEEP_PM_OPS(spi_st_suspend, spi_st_resume) + SET_RUNTIME_PM_OPS(spi_st_runtime_suspend, spi_st_runtime_resume, NULL) +}; + +static struct of_device_id stm_spi_match[] = { + { .compatible = "st,comms-ssc4-spi", }, + {}, +}; +MODULE_DEVICE_TABLE(of, stm_spi_match); + +static struct platform_driver spi_st_driver = { + .driver = { + .name = "spi-st", + .pm = &spi_st_pm, + .of_match_table = of_match_ptr(stm_spi_match), + }, + .probe = spi_st_probe, + .remove = spi_st_remove, +}; +module_platform_driver(spi_st_driver); + +MODULE_AUTHOR("Patrice Chotard <patrice.chotard@st.com>"); +MODULE_DESCRIPTION("STM SSC SPI driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 6146c4cd6583..884a716e50cb 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -201,7 +201,7 @@ static void ti_qspi_restore_ctx(struct ti_qspi *qspi) static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t) { - int wlen, count, ret; + int wlen, count; unsigned int cmd; const u8 *txbuf; @@ -230,9 +230,8 @@ static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t) } ti_qspi_write(qspi, cmd, QSPI_SPI_CMD_REG); - ret = wait_for_completion_timeout(&qspi->transfer_complete, - QSPI_COMPLETION_TIMEOUT); - if (ret == 0) { + if (!wait_for_completion_timeout(&qspi->transfer_complete, + QSPI_COMPLETION_TIMEOUT)) { dev_err(qspi->dev, "write timed out\n"); return -ETIMEDOUT; } @@ -245,7 +244,7 @@ static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t) static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t) { - int wlen, count, ret; + int wlen, count; unsigned int cmd; u8 *rxbuf; @@ -268,9 +267,8 @@ static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t) while (count) { dev_dbg(qspi->dev, "rx cmd %08x dc %08x\n", cmd, qspi->dc); ti_qspi_write(qspi, cmd, QSPI_SPI_CMD_REG); - ret = wait_for_completion_timeout(&qspi->transfer_complete, - QSPI_COMPLETION_TIMEOUT); - if (ret == 0) { + if (!wait_for_completion_timeout(&qspi->transfer_complete, + QSPI_COMPLETION_TIMEOUT)) { dev_err(qspi->dev, "read timed out\n"); return -ETIMEDOUT; } diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index be692ad50442..93dfcee0f987 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ #include <linux/delay.h> diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index 79bd84f43430..133f53a9c1d4 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -22,6 +22,8 @@ #include <linux/spi/xilinx_spi.h> #include <linux/io.h> +#define XILINX_SPI_MAX_CS 32 + #define XILINX_SPI_NAME "xilinx_spi" /* Register definitions as per "OPB Serial Peripheral Interface (SPI) (v1.00e) @@ -34,7 +36,8 @@ #define XSPI_CR_MASTER_MODE 0x04 #define XSPI_CR_CPOL 0x08 #define XSPI_CR_CPHA 0x10 -#define XSPI_CR_MODE_MASK (XSPI_CR_CPHA | XSPI_CR_CPOL) +#define XSPI_CR_MODE_MASK (XSPI_CR_CPHA | XSPI_CR_CPOL | \ + XSPI_CR_LSB_FIRST | XSPI_CR_LOOP) #define XSPI_CR_TXFIFO_RESET 0x20 #define XSPI_CR_RXFIFO_RESET 0x40 #define XSPI_CR_MANUAL_SSELECT 0x80 @@ -85,12 +88,11 @@ struct xilinx_spi { u8 *rx_ptr; /* pointer in the Tx buffer */ const u8 *tx_ptr; /* pointer in the Rx buffer */ - int remaining_bytes; /* the number of bytes left to transfer */ - u8 bits_per_word; + u8 bytes_per_word; + int buffer_size; /* buffer size in words */ + u32 cs_inactive; /* Level of the CS pins when inactive*/ unsigned int (*read_fn)(void __iomem *); void (*write_fn)(u32, void __iomem *); - void (*tx_fn)(struct xilinx_spi *); - void (*rx_fn)(struct xilinx_spi *); }; static void xspi_write32(u32 val, void __iomem *addr) @@ -113,49 +115,51 @@ static unsigned int xspi_read32_be(void __iomem *addr) return ioread32be(addr); } -static void xspi_tx8(struct xilinx_spi *xspi) +static void xilinx_spi_tx(struct xilinx_spi *xspi) { - xspi->write_fn(*xspi->tx_ptr, xspi->regs + XSPI_TXD_OFFSET); - xspi->tx_ptr++; -} - -static void xspi_tx16(struct xilinx_spi *xspi) -{ - xspi->write_fn(*(u16 *)(xspi->tx_ptr), xspi->regs + XSPI_TXD_OFFSET); - xspi->tx_ptr += 2; -} + u32 data = 0; -static void xspi_tx32(struct xilinx_spi *xspi) -{ - xspi->write_fn(*(u32 *)(xspi->tx_ptr), xspi->regs + XSPI_TXD_OFFSET); - xspi->tx_ptr += 4; -} - -static void xspi_rx8(struct xilinx_spi *xspi) -{ - u32 data = xspi->read_fn(xspi->regs + XSPI_RXD_OFFSET); - if (xspi->rx_ptr) { - *xspi->rx_ptr = data & 0xff; - xspi->rx_ptr++; + if (!xspi->tx_ptr) { + xspi->write_fn(0, xspi->regs + XSPI_TXD_OFFSET); + return; } -} -static void xspi_rx16(struct xilinx_spi *xspi) -{ - u32 data = xspi->read_fn(xspi->regs + XSPI_RXD_OFFSET); - if (xspi->rx_ptr) { - *(u16 *)(xspi->rx_ptr) = data & 0xffff; - xspi->rx_ptr += 2; + switch (xspi->bytes_per_word) { + case 1: + data = *(u8 *)(xspi->tx_ptr); + break; + case 2: + data = *(u16 *)(xspi->tx_ptr); + break; + case 4: + data = *(u32 *)(xspi->tx_ptr); + break; } + + xspi->write_fn(data, xspi->regs + XSPI_TXD_OFFSET); + xspi->tx_ptr += xspi->bytes_per_word; } -static void xspi_rx32(struct xilinx_spi *xspi) +static void xilinx_spi_rx(struct xilinx_spi *xspi) { u32 data = xspi->read_fn(xspi->regs + XSPI_RXD_OFFSET); - if (xspi->rx_ptr) { + + if (!xspi->rx_ptr) + return; + + switch (xspi->bytes_per_word) { + case 1: + *(u8 *)(xspi->rx_ptr) = data; + break; + case 2: + *(u16 *)(xspi->rx_ptr) = data; + break; + case 4: *(u32 *)(xspi->rx_ptr) = data; - xspi->rx_ptr += 4; + break; } + + xspi->rx_ptr += xspi->bytes_per_word; } static void xspi_init_hw(struct xilinx_spi *xspi) @@ -165,46 +169,56 @@ static void xspi_init_hw(struct xilinx_spi *xspi) /* Reset the SPI device */ xspi->write_fn(XIPIF_V123B_RESET_MASK, regs_base + XIPIF_V123B_RESETR_OFFSET); - /* Disable all the interrupts just in case */ - xspi->write_fn(0, regs_base + XIPIF_V123B_IIER_OFFSET); - /* Enable the global IPIF interrupt */ - xspi->write_fn(XIPIF_V123B_GINTR_ENABLE, - regs_base + XIPIF_V123B_DGIER_OFFSET); + /* Enable the transmit empty interrupt, which we use to determine + * progress on the transmission. + */ + xspi->write_fn(XSPI_INTR_TX_EMPTY, + regs_base + XIPIF_V123B_IIER_OFFSET); + /* Disable the global IPIF interrupt */ + xspi->write_fn(0, regs_base + XIPIF_V123B_DGIER_OFFSET); /* Deselect the slave on the SPI bus */ xspi->write_fn(0xffff, regs_base + XSPI_SSR_OFFSET); /* Disable the transmitter, enable Manual Slave Select Assertion, * put SPI controller into master mode, and enable it */ - xspi->write_fn(XSPI_CR_TRANS_INHIBIT | XSPI_CR_MANUAL_SSELECT | - XSPI_CR_MASTER_MODE | XSPI_CR_ENABLE | XSPI_CR_TXFIFO_RESET | - XSPI_CR_RXFIFO_RESET, regs_base + XSPI_CR_OFFSET); + xspi->write_fn(XSPI_CR_MANUAL_SSELECT | XSPI_CR_MASTER_MODE | + XSPI_CR_ENABLE | XSPI_CR_TXFIFO_RESET | XSPI_CR_RXFIFO_RESET, + regs_base + XSPI_CR_OFFSET); } static void xilinx_spi_chipselect(struct spi_device *spi, int is_on) { struct xilinx_spi *xspi = spi_master_get_devdata(spi->master); + u16 cr; + u32 cs; if (is_on == BITBANG_CS_INACTIVE) { /* Deselect the slave on the SPI bus */ - xspi->write_fn(0xffff, xspi->regs + XSPI_SSR_OFFSET); - } else if (is_on == BITBANG_CS_ACTIVE) { - /* Set the SPI clock phase and polarity */ - u16 cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET) - & ~XSPI_CR_MODE_MASK; - if (spi->mode & SPI_CPHA) - cr |= XSPI_CR_CPHA; - if (spi->mode & SPI_CPOL) - cr |= XSPI_CR_CPOL; - xspi->write_fn(cr, xspi->regs + XSPI_CR_OFFSET); - - /* We do not check spi->max_speed_hz here as the SPI clock - * frequency is not software programmable (the IP block design - * parameter) - */ - - /* Activate the chip select */ - xspi->write_fn(~(0x0001 << spi->chip_select), - xspi->regs + XSPI_SSR_OFFSET); + xspi->write_fn(xspi->cs_inactive, xspi->regs + XSPI_SSR_OFFSET); + return; } + + /* Set the SPI clock phase and polarity */ + cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET) & ~XSPI_CR_MODE_MASK; + if (spi->mode & SPI_CPHA) + cr |= XSPI_CR_CPHA; + if (spi->mode & SPI_CPOL) + cr |= XSPI_CR_CPOL; + if (spi->mode & SPI_LSB_FIRST) + cr |= XSPI_CR_LSB_FIRST; + if (spi->mode & SPI_LOOP) + cr |= XSPI_CR_LOOP; + xspi->write_fn(cr, xspi->regs + XSPI_CR_OFFSET); + + /* We do not check spi->max_speed_hz here as the SPI clock + * frequency is not software programmable (the IP block design + * parameter) + */ + + cs = xspi->cs_inactive; + cs ^= BIT(spi->chip_select); + + /* Activate the chip select */ + xspi->write_fn(cs, xspi->regs + XSPI_SSR_OFFSET); } /* spi_bitbang requires custom setup_transfer() to be defined if there is a @@ -213,85 +227,85 @@ static void xilinx_spi_chipselect(struct spi_device *spi, int is_on) static int xilinx_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t) { - return 0; -} + struct xilinx_spi *xspi = spi_master_get_devdata(spi->master); -static void xilinx_spi_fill_tx_fifo(struct xilinx_spi *xspi) -{ - u8 sr; + if (spi->mode & SPI_CS_HIGH) + xspi->cs_inactive &= ~BIT(spi->chip_select); + else + xspi->cs_inactive |= BIT(spi->chip_select); - /* Fill the Tx FIFO with as many bytes as possible */ - sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET); - while ((sr & XSPI_SR_TX_FULL_MASK) == 0 && xspi->remaining_bytes > 0) { - if (xspi->tx_ptr) - xspi->tx_fn(xspi); - else - xspi->write_fn(0, xspi->regs + XSPI_TXD_OFFSET); - xspi->remaining_bytes -= xspi->bits_per_word / 8; - sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET); - } + return 0; } static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) { struct xilinx_spi *xspi = spi_master_get_devdata(spi->master); - u32 ipif_ier; + int remaining_words; /* the number of words left to transfer */ + bool use_irq = false; + u16 cr = 0; /* We get here with transmitter inhibited */ xspi->tx_ptr = t->tx_buf; xspi->rx_ptr = t->rx_buf; - xspi->remaining_bytes = t->len; + remaining_words = t->len / xspi->bytes_per_word; reinit_completion(&xspi->done); + if (xspi->irq >= 0 && remaining_words > xspi->buffer_size) { + use_irq = true; + xspi->write_fn(XSPI_INTR_TX_EMPTY, + xspi->regs + XIPIF_V123B_IISR_OFFSET); + /* Enable the global IPIF interrupt */ + xspi->write_fn(XIPIF_V123B_GINTR_ENABLE, + xspi->regs + XIPIF_V123B_DGIER_OFFSET); + /* Inhibit irq to avoid spurious irqs on tx_empty*/ + cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET); + xspi->write_fn(cr | XSPI_CR_TRANS_INHIBIT, + xspi->regs + XSPI_CR_OFFSET); + } - /* Enable the transmit empty interrupt, which we use to determine - * progress on the transmission. - */ - ipif_ier = xspi->read_fn(xspi->regs + XIPIF_V123B_IIER_OFFSET); - xspi->write_fn(ipif_ier | XSPI_INTR_TX_EMPTY, - xspi->regs + XIPIF_V123B_IIER_OFFSET); + while (remaining_words) { + int n_words, tx_words, rx_words; - for (;;) { - u16 cr; - u8 sr; + n_words = min(remaining_words, xspi->buffer_size); - xilinx_spi_fill_tx_fifo(xspi); + tx_words = n_words; + while (tx_words--) + xilinx_spi_tx(xspi); /* Start the transfer by not inhibiting the transmitter any * longer */ - cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET) & - ~XSPI_CR_TRANS_INHIBIT; - xspi->write_fn(cr, xspi->regs + XSPI_CR_OFFSET); - wait_for_completion(&xspi->done); + if (use_irq) { + xspi->write_fn(cr, xspi->regs + XSPI_CR_OFFSET); + wait_for_completion(&xspi->done); + } else + while (!(xspi->read_fn(xspi->regs + XSPI_SR_OFFSET) & + XSPI_SR_TX_EMPTY_MASK)) + ; /* A transmit has just completed. Process received data and * check for more data to transmit. Always inhibit the * transmitter while the Isr refills the transmit register/FIFO, * or make sure it is stopped if we're done. */ - cr = xspi->read_fn(xspi->regs + XSPI_CR_OFFSET); - xspi->write_fn(cr | XSPI_CR_TRANS_INHIBIT, + if (use_irq) + xspi->write_fn(cr | XSPI_CR_TRANS_INHIBIT, xspi->regs + XSPI_CR_OFFSET); /* Read out all the data from the Rx FIFO */ - sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET); - while ((sr & XSPI_SR_RX_EMPTY_MASK) == 0) { - xspi->rx_fn(xspi); - sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET); - } - - /* See if there is more data to send */ - if (xspi->remaining_bytes <= 0) - break; + rx_words = n_words; + while (rx_words--) + xilinx_spi_rx(xspi); + + remaining_words -= n_words; } - /* Disable the transmit empty interrupt */ - xspi->write_fn(ipif_ier, xspi->regs + XIPIF_V123B_IIER_OFFSET); + if (use_irq) + xspi->write_fn(0, xspi->regs + XIPIF_V123B_DGIER_OFFSET); - return t->len - xspi->remaining_bytes; + return t->len; } @@ -316,6 +330,28 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id) return IRQ_HANDLED; } +static int xilinx_spi_find_buffer_size(struct xilinx_spi *xspi) +{ + u8 sr; + int n_words = 0; + + /* + * Before the buffer_size detection we reset the core + * to make sure we start with a clean state. + */ + xspi->write_fn(XIPIF_V123B_RESET_MASK, + xspi->regs + XIPIF_V123B_RESETR_OFFSET); + + /* Fill the Tx FIFO with as many words as possible */ + do { + xspi->write_fn(0, xspi->regs + XSPI_TXD_OFFSET); + sr = xspi->read_fn(xspi->regs + XSPI_SR_OFFSET); + n_words++; + } while (!(sr & XSPI_SR_TX_FULL_MASK)); + + return n_words; +} + static const struct of_device_id xilinx_spi_of_match[] = { { .compatible = "xlnx,xps-spi-2.00.a", }, { .compatible = "xlnx,xps-spi-2.00.b", }, @@ -348,14 +384,21 @@ static int xilinx_spi_probe(struct platform_device *pdev) return -EINVAL; } + if (num_cs > XILINX_SPI_MAX_CS) { + dev_err(&pdev->dev, "Invalid number of spi slaves\n"); + return -EINVAL; + } + master = spi_alloc_master(&pdev->dev, sizeof(struct xilinx_spi)); if (!master) return -ENODEV; /* the spi->mode bits understood by this driver: */ - master->mode_bits = SPI_CPOL | SPI_CPHA; + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST | SPI_LOOP | + SPI_CS_HIGH; xspi = spi_master_get_devdata(master); + xspi->cs_inactive = 0xffffffff; xspi->bitbang.master = master; xspi->bitbang.chipselect = xilinx_spi_chipselect; xspi->bitbang.setup_transfer = xilinx_spi_setup_transfer; @@ -392,35 +435,20 @@ static int xilinx_spi_probe(struct platform_device *pdev) } master->bits_per_word_mask = SPI_BPW_MASK(bits_per_word); - xspi->bits_per_word = bits_per_word; - if (xspi->bits_per_word == 8) { - xspi->tx_fn = xspi_tx8; - xspi->rx_fn = xspi_rx8; - } else if (xspi->bits_per_word == 16) { - xspi->tx_fn = xspi_tx16; - xspi->rx_fn = xspi_rx16; - } else if (xspi->bits_per_word == 32) { - xspi->tx_fn = xspi_tx32; - xspi->rx_fn = xspi_rx32; - } else { - ret = -EINVAL; - goto put_master; - } - - /* SPI controller initializations */ - xspi_init_hw(xspi); + xspi->bytes_per_word = bits_per_word / 8; + xspi->buffer_size = xilinx_spi_find_buffer_size(xspi); xspi->irq = platform_get_irq(pdev, 0); - if (xspi->irq < 0) { - ret = xspi->irq; - goto put_master; + if (xspi->irq >= 0) { + /* Register for SPI Interrupt */ + ret = devm_request_irq(&pdev->dev, xspi->irq, xilinx_spi_irq, 0, + dev_name(&pdev->dev), xspi); + if (ret) + goto put_master; } - /* Register for SPI Interrupt */ - ret = devm_request_irq(&pdev->dev, xspi->irq, xilinx_spi_irq, 0, - dev_name(&pdev->dev), xspi); - if (ret) - goto put_master; + /* SPI controller initializations */ + xspi_init_hw(xspi); ret = spi_bitbang_start(&xspi->bitbang); if (ret) { diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 66a70e9bc743..c64a3e59fce3 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -13,10 +13,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/kernel.h> @@ -788,7 +784,7 @@ static int spi_transfer_one_message(struct spi_master *master, struct spi_transfer *xfer; bool keep_cs = false; int ret = 0; - int ms = 1; + unsigned long ms = 1; spi_set_cs(msg->spi, true); @@ -875,31 +871,59 @@ void spi_finalize_current_transfer(struct spi_master *master) EXPORT_SYMBOL_GPL(spi_finalize_current_transfer); /** - * spi_pump_messages - kthread work function which processes spi message queue - * @work: pointer to kthread work struct contained in the master struct + * __spi_pump_messages - function which processes spi message queue + * @master: master to process queue for + * @in_kthread: true if we are in the context of the message pump thread * * This function checks if there is any spi message in the queue that * needs processing and if so call out to the driver to initialize hardware * and transfer each message. * + * Note that it is called both from the kthread itself and also from + * inside spi_sync(); the queue extraction handling at the top of the + * function should deal with this safely. */ -static void spi_pump_messages(struct kthread_work *work) +static void __spi_pump_messages(struct spi_master *master, bool in_kthread) { - struct spi_master *master = - container_of(work, struct spi_master, pump_messages); unsigned long flags; bool was_busy = false; int ret; - /* Lock queue and check for queue work */ + /* Lock queue */ spin_lock_irqsave(&master->queue_lock, flags); + + /* Make sure we are not already running a message */ + if (master->cur_msg) { + spin_unlock_irqrestore(&master->queue_lock, flags); + return; + } + + /* If another context is idling the device then defer */ + if (master->idling) { + queue_kthread_work(&master->kworker, &master->pump_messages); + spin_unlock_irqrestore(&master->queue_lock, flags); + return; + } + + /* Check if the queue is idle */ if (list_empty(&master->queue) || !master->running) { if (!master->busy) { spin_unlock_irqrestore(&master->queue_lock, flags); return; } + + /* Only do teardown in the thread */ + if (!in_kthread) { + queue_kthread_work(&master->kworker, + &master->pump_messages); + spin_unlock_irqrestore(&master->queue_lock, flags); + return; + } + master->busy = false; + master->idling = true; spin_unlock_irqrestore(&master->queue_lock, flags); + kfree(master->dummy_rx); master->dummy_rx = NULL; kfree(master->dummy_tx); @@ -913,14 +937,13 @@ static void spi_pump_messages(struct kthread_work *work) pm_runtime_put_autosuspend(master->dev.parent); } trace_spi_master_idle(master); - return; - } - /* Make sure we are not already running a message */ - if (master->cur_msg) { + spin_lock_irqsave(&master->queue_lock, flags); + master->idling = false; spin_unlock_irqrestore(&master->queue_lock, flags); return; } + /* Extract head of queue */ master->cur_msg = list_first_entry(&master->queue, struct spi_message, queue); @@ -985,13 +1008,22 @@ static void spi_pump_messages(struct kthread_work *work) } } +/** + * spi_pump_messages - kthread work function which processes spi message queue + * @work: pointer to kthread work struct contained in the master struct + */ +static void spi_pump_messages(struct kthread_work *work) +{ + struct spi_master *master = + container_of(work, struct spi_master, pump_messages); + + __spi_pump_messages(master, true); +} + static int spi_init_queue(struct spi_master *master) { struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; - INIT_LIST_HEAD(&master->queue); - spin_lock_init(&master->queue_lock); - master->running = false; master->busy = false; @@ -1161,12 +1193,9 @@ static int spi_destroy_queue(struct spi_master *master) return 0; } -/** - * spi_queued_transfer - transfer function for queued transfers - * @spi: spi device which is requesting transfer - * @msg: spi message which is to handled is queued to driver queue - */ -static int spi_queued_transfer(struct spi_device *spi, struct spi_message *msg) +static int __spi_queued_transfer(struct spi_device *spi, + struct spi_message *msg, + bool need_pump) { struct spi_master *master = spi->master; unsigned long flags; @@ -1181,13 +1210,23 @@ static int spi_queued_transfer(struct spi_device *spi, struct spi_message *msg) msg->status = -EINPROGRESS; list_add_tail(&msg->queue, &master->queue); - if (!master->busy) + if (!master->busy && need_pump) queue_kthread_work(&master->kworker, &master->pump_messages); spin_unlock_irqrestore(&master->queue_lock, flags); return 0; } +/** + * spi_queued_transfer - transfer function for queued transfers + * @spi: spi device which is requesting transfer + * @msg: spi message which is to handled is queued to driver queue + */ +static int spi_queued_transfer(struct spi_device *spi, struct spi_message *msg) +{ + return __spi_queued_transfer(spi, msg, true); +} + static int spi_master_initialize_queue(struct spi_master *master) { int ret; @@ -1609,6 +1648,8 @@ int spi_register_master(struct spi_master *master) dynamic = 1; } + INIT_LIST_HEAD(&master->queue); + spin_lock_init(&master->queue_lock); spin_lock_init(&master->bus_lock_spinlock); mutex_init(&master->bus_lock_mutex); master->bus_lock_flag = 0; @@ -2114,19 +2155,46 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message, DECLARE_COMPLETION_ONSTACK(done); int status; struct spi_master *master = spi->master; + unsigned long flags; + + status = __spi_validate(spi, message); + if (status != 0) + return status; message->complete = spi_complete; message->context = &done; + message->spi = spi; if (!bus_locked) mutex_lock(&master->bus_lock_mutex); - status = spi_async_locked(spi, message); + /* If we're not using the legacy transfer method then we will + * try to transfer in the calling context so special case. + * This code would be less tricky if we could remove the + * support for driver implemented message queues. + */ + if (master->transfer == spi_queued_transfer) { + spin_lock_irqsave(&master->bus_lock_spinlock, flags); + + trace_spi_message_submit(message); + + status = __spi_queued_transfer(spi, message, false); + + spin_unlock_irqrestore(&master->bus_lock_spinlock, flags); + } else { + status = spi_async_locked(spi, message); + } if (!bus_locked) mutex_unlock(&master->bus_lock_mutex); if (status == 0) { + /* Push out the messages in the calling context if we + * can. + */ + if (master->transfer == spi_queued_transfer) + __spi_pump_messages(master, false); + wait_for_completion(&done); status = message->status; } diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 6941e04afb8c..4eb7a980e670 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -14,10 +14,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/init.h> @@ -317,6 +313,37 @@ done: return status; } +static struct spi_ioc_transfer * +spidev_get_ioc_message(unsigned int cmd, struct spi_ioc_transfer __user *u_ioc, + unsigned *n_ioc) +{ + struct spi_ioc_transfer *ioc; + u32 tmp; + + /* Check type, command number and direction */ + if (_IOC_TYPE(cmd) != SPI_IOC_MAGIC + || _IOC_NR(cmd) != _IOC_NR(SPI_IOC_MESSAGE(0)) + || _IOC_DIR(cmd) != _IOC_WRITE) + return ERR_PTR(-ENOTTY); + + tmp = _IOC_SIZE(cmd); + if ((tmp % sizeof(struct spi_ioc_transfer)) != 0) + return ERR_PTR(-EINVAL); + *n_ioc = tmp / sizeof(struct spi_ioc_transfer); + if (*n_ioc == 0) + return NULL; + + /* copy into scratch area */ + ioc = kmalloc(tmp, GFP_KERNEL); + if (!ioc) + return ERR_PTR(-ENOMEM); + if (__copy_from_user(ioc, u_ioc, tmp)) { + kfree(ioc); + return ERR_PTR(-EFAULT); + } + return ioc; +} + static long spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -456,32 +483,15 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) default: /* segmented and/or full-duplex I/O request */ - if (_IOC_NR(cmd) != _IOC_NR(SPI_IOC_MESSAGE(0)) - || _IOC_DIR(cmd) != _IOC_WRITE) { - retval = -ENOTTY; - break; - } - - tmp = _IOC_SIZE(cmd); - if ((tmp % sizeof(struct spi_ioc_transfer)) != 0) { - retval = -EINVAL; - break; - } - n_ioc = tmp / sizeof(struct spi_ioc_transfer); - if (n_ioc == 0) - break; - - /* copy into scratch area */ - ioc = kmalloc(tmp, GFP_KERNEL); - if (!ioc) { - retval = -ENOMEM; - break; - } - if (__copy_from_user(ioc, (void __user *)arg, tmp)) { - kfree(ioc); - retval = -EFAULT; + /* Check message and copy into scratch area */ + ioc = spidev_get_ioc_message(cmd, + (struct spi_ioc_transfer __user *)arg, &n_ioc); + if (IS_ERR(ioc)) { + retval = PTR_ERR(ioc); break; } + if (!ioc) + break; /* n_ioc is also 0 */ /* translate to spi_message, execute */ retval = spidev_message(spidev, ioc, n_ioc); @@ -496,8 +506,67 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) #ifdef CONFIG_COMPAT static long +spidev_compat_ioc_message(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct spi_ioc_transfer __user *u_ioc; + int retval = 0; + struct spidev_data *spidev; + struct spi_device *spi; + unsigned n_ioc, n; + struct spi_ioc_transfer *ioc; + + u_ioc = (struct spi_ioc_transfer __user *) compat_ptr(arg); + if (!access_ok(VERIFY_READ, u_ioc, _IOC_SIZE(cmd))) + return -EFAULT; + + /* guard against device removal before, or while, + * we issue this ioctl. + */ + spidev = filp->private_data; + spin_lock_irq(&spidev->spi_lock); + spi = spi_dev_get(spidev->spi); + spin_unlock_irq(&spidev->spi_lock); + + if (spi == NULL) + return -ESHUTDOWN; + + /* SPI_IOC_MESSAGE needs the buffer locked "normally" */ + mutex_lock(&spidev->buf_lock); + + /* Check message and copy into scratch area */ + ioc = spidev_get_ioc_message(cmd, u_ioc, &n_ioc); + if (IS_ERR(ioc)) { + retval = PTR_ERR(ioc); + goto done; + } + if (!ioc) + goto done; /* n_ioc is also 0 */ + + /* Convert buffer pointers */ + for (n = 0; n < n_ioc; n++) { + ioc[n].rx_buf = (uintptr_t) compat_ptr(ioc[n].rx_buf); + ioc[n].tx_buf = (uintptr_t) compat_ptr(ioc[n].tx_buf); + } + + /* translate to spi_message, execute */ + retval = spidev_message(spidev, ioc, n_ioc); + kfree(ioc); + +done: + mutex_unlock(&spidev->buf_lock); + spi_dev_put(spi); + return retval; +} + +static long spidev_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + if (_IOC_TYPE(cmd) == SPI_IOC_MAGIC + && _IOC_NR(cmd) == _IOC_NR(SPI_IOC_MESSAGE(0)) + && _IOC_DIR(cmd) == _IOC_WRITE) + return spidev_compat_ioc_message(filp, cmd, arg); + return spidev_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); } #else diff --git a/fs/Kconfig b/fs/Kconfig index 664991afe0c0..a6bb530b1ec5 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -165,6 +165,7 @@ config HUGETLB_PAGE def_bool HUGETLBFS source "fs/configfs/Kconfig" +source "fs/efivarfs/Kconfig" endmenu @@ -209,7 +210,6 @@ source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" source "fs/f2fs/Kconfig" -source "fs/efivarfs/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index a66768ebc8d1..80e9c18ea64f 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -8,6 +8,7 @@ config BTRFS_FS select LZO_DECOMPRESS select RAID6_PQ select XOR_BLOCKS + select SRCU help Btrfs is a general purpose copy-on-write filesystem with extents, diff --git a/fs/efivarfs/Kconfig b/fs/efivarfs/Kconfig index 367bbb10c543..c2499ef174a2 100644 --- a/fs/efivarfs/Kconfig +++ b/fs/efivarfs/Kconfig @@ -1,6 +1,7 @@ config EFIVAR_FS tristate "EFI Variable filesystem" depends on EFI + default m help efivarfs is a replacement filesystem for the old EFI variable support via sysfs, as it doesn't suffer from the diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 6dad1176ec52..ddbce42548c9 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -140,7 +140,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, name[len] = '-'; - efi_guid_unparse(&entry->var.VendorGuid, name + len + 1); + efi_guid_to_str(&entry->var.VendorGuid, name + len + 1); name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 22c629eedd82..2a24249b30af 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig @@ -1,5 +1,6 @@ config FSNOTIFY def_bool n + select SRCU source "fs/notify/dnotify/Kconfig" source "fs/notify/inotify/Kconfig" diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index c51df1dd237e..4a09975aac90 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -5,6 +5,7 @@ config QUOTA bool "Quota support" select QUOTACTL + select SRCU help If you say Y here, you will be able to set per user limits for disk usage (also called disk quotas). Currently, it works for the diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 33063f872ee3..176bf816875e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -385,7 +385,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s /* Is this type a native word size -- useful for atomic operations */ #ifndef __native_word -# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) +# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) #endif /* Compile time object size, -1 for unknown */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 0238d612750e..b674837e2b98 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -848,7 +848,7 @@ efi_guidcmp (efi_guid_t left, efi_guid_t right) } static inline char * -efi_guid_unparse(efi_guid_t *guid, char *out) +efi_guid_to_str(efi_guid_t *guid, char *out) { sprintf(out, "%pUl", guid->b); return out; diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0bebb5c348b8..d36f68b08acc 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -595,7 +595,7 @@ extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); extern void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs *regs, int *rctxp); + struct pt_regs **regs, int *rctxp); static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index a036d058a249..05f6df1fdf5b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -170,6 +170,7 @@ enum hrtimer_base_type { * @clock_was_set: Indicates that clock was set from irq context. * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() + * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode * @hang_detected: The last hrtimer interrupt detected a hang * @nr_events: Total number of hrtimer interrupt events @@ -185,6 +186,7 @@ struct hrtimer_cpu_base { unsigned int clock_was_set; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; + int in_hrtirq; int hres_active; int hang_detected; unsigned long nr_events; diff --git a/include/linux/ktime.h b/include/linux/ktime.h index c9d645ad98ff..5fc3d1083071 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -166,7 +166,17 @@ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) } #if BITS_PER_LONG < 64 -extern u64 ktime_divns(const ktime_t kt, s64 div); +extern u64 __ktime_divns(const ktime_t kt, s64 div); +static inline u64 ktime_divns(const ktime_t kt, s64 div) +{ + if (__builtin_constant_p(div) && !(div >> 32)) { + u64 ns = kt.tv64; + do_div(ns, div); + return ns; + } else { + return __ktime_divns(kt, div); + } +} #else /* BITS_PER_LONG < 64 */ # define ktime_divns(kt, div) (u64)((kt).tv64 / (div)) #endif @@ -186,6 +196,11 @@ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) return ktime_to_us(ktime_sub(later, earlier)); } +static inline s64 ktime_ms_delta(const ktime_t later, const ktime_t earlier) +{ + return ktime_to_ms(ktime_sub(later, earlier)); +} + static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) { return ktime_add_ns(kt, usec * NSEC_PER_USEC); diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index 90230d5811c5..3a6490e81b28 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -5,8 +5,11 @@ * An MCS like lock especially tailored for optimistic spinning for sleeping * lock implementations (mutex, rwsem, etc). */ - -#define OSQ_UNLOCKED_VAL (0) +struct optimistic_spin_node { + struct optimistic_spin_node *next, *prev; + int locked; /* 1 if lock acquired */ + int cpu; /* encoded CPU # + 1 value */ +}; struct optimistic_spin_queue { /* @@ -16,6 +19,8 @@ struct optimistic_spin_queue { atomic_t tail; }; +#define OSQ_UNLOCKED_VAL (0) + /* Init macro and function. */ #define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } @@ -24,4 +29,7 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock) atomic_set(&lock->tail, OSQ_UNLOCKED_VAL); } +extern bool osq_lock(struct optimistic_spin_queue *lock); +extern void osq_unlock(struct optimistic_spin_queue *lock); + #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 664de5a4ec46..5cad0e6f3552 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -469,6 +469,7 @@ struct perf_event_context { */ struct mutex mutex; + struct list_head active_ctx_list; struct list_head pinned_groups; struct list_head flexible_groups; struct list_head event_list; @@ -519,7 +520,6 @@ struct perf_cpu_context { int exclusive; struct hrtimer hrtimer; ktime_t hrtimer_interval; - struct list_head rotation_list; struct pmu *unique_pmu; struct perf_cgroup *cgrp; }; @@ -659,6 +659,7 @@ static inline int is_software_event(struct perf_event *event) extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs @@ -683,14 +684,25 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) static __always_inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { - struct pt_regs hot_regs; + if (static_key_false(&perf_swevent_enabled[event_id])) + __perf_sw_event(event_id, nr, regs, addr); +} + +DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]); +/* + * 'Special' version for the scheduler, it hard assumes no recursion, + * which is guaranteed by us not actually scheduling inside other swevents + * because those disable preemption. + */ +static __always_inline void +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) +{ if (static_key_false(&perf_swevent_enabled[event_id])) { - if (!regs) { - perf_fetch_caller_regs(&hot_regs); - regs = &hot_regs; - } - __perf_sw_event(event_id, nr, regs, addr); + struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); + + perf_fetch_caller_regs(regs); + ___perf_sw_event(event_id, nr, regs, addr); } } @@ -706,7 +718,7 @@ static inline void perf_event_task_sched_in(struct task_struct *prev, static inline void perf_event_task_sched_out(struct task_struct *prev, struct task_struct *next) { - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); + perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_out(prev, next); @@ -817,6 +829,8 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh) static inline void perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void +perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { } +static inline void perf_bp_event(struct perf_event *event, void *data) { } static inline int perf_register_guest_info_callbacks diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 77aed9ea1d26..dab545bb66b3 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -37,6 +37,7 @@ #define SSDR (0x10) /* SSP Data Write/Data Read Register */ #define SSTO (0x28) /* SSP Time Out Register */ +#define DDS_RATE (0x28) /* SSP DDS Clock Rate Register (Intel Quark) */ #define SSPSP (0x2C) /* SSP Programmable Serial Protocol */ #define SSTSA (0x30) /* SSP Tx Timeslot Active */ #define SSRSA (0x34) /* SSP Rx Timeslot Active */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 529bc946f450..a18b16f1dc0e 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -524,11 +524,11 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * @member: the name of the hlist_node within the struct. */ #define hlist_for_each_entry_continue_rcu(pos, member) \ - for (pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ - typeof(*(pos)), member); \ + for (pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member); \ pos; \ - pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ - typeof(*(pos)), member)) + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point @@ -536,11 +536,11 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, * @member: the name of the hlist_node within the struct. */ #define hlist_for_each_entry_continue_rcu_bh(pos, member) \ - for (pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ - typeof(*(pos)), member); \ + for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member); \ pos; \ - pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ - typeof(*(pos)), member)) + pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ed4f5939a452..78097491cd99 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -331,12 +331,13 @@ static inline void rcu_init_nohz(void) extern struct srcu_struct tasks_rcu_exit_srcu; #define rcu_note_voluntary_context_switch(t) \ do { \ + rcu_all_qs(); \ if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) -#define rcu_note_voluntary_context_switch(t) do { } while (0) +#define rcu_note_voluntary_context_switch(t) rcu_all_qs() #endif /* #else #ifdef CONFIG_TASKS_RCU */ /** @@ -582,11 +583,11 @@ static inline void rcu_preempt_sleep_check(void) }) #define __rcu_dereference_check(p, c, space) \ ({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \ + /* Dependency order vs. p above. */ \ + typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \ rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ rcu_dereference_sparse(p, space); \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ - ((typeof(*p) __force __kernel *)(_________p1)); \ + ((typeof(*p) __force __kernel *)(________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ @@ -603,10 +604,10 @@ static inline void rcu_preempt_sleep_check(void) }) #define __rcu_dereference_index_check(p, c) \ ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ + /* Dependency order vs. p above. */ \ + typeof(p) _________p1 = lockless_dereference(p); \ rcu_lockdep_assert(c, \ "suspicious rcu_dereference_index_check() usage"); \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 0e5366200154..937edaeb150d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -92,17 +92,49 @@ static inline void rcu_virt_note_context_switch(int cpu) } /* - * Return the number of grace periods. + * Return the number of grace periods started. */ -static inline long rcu_batches_completed(void) +static inline unsigned long rcu_batches_started(void) { return 0; } /* - * Return the number of bottom-half grace periods. + * Return the number of bottom-half grace periods started. */ -static inline long rcu_batches_completed_bh(void) +static inline unsigned long rcu_batches_started_bh(void) +{ + return 0; +} + +/* + * Return the number of sched grace periods started. + */ +static inline unsigned long rcu_batches_started_sched(void) +{ + return 0; +} + +/* + * Return the number of grace periods completed. + */ +static inline unsigned long rcu_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods completed. + */ +static inline unsigned long rcu_batches_completed_bh(void) +{ + return 0; +} + +/* + * Return the number of sched grace periods completed. + */ +static inline unsigned long rcu_batches_completed_sched(void) { return 0; } @@ -154,7 +186,10 @@ static inline bool rcu_is_watching(void) return true; } - #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ +static inline void rcu_all_qs(void) +{ +} + #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 52953790dcca..d2e583a6aaca 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -81,9 +81,12 @@ void cond_synchronize_rcu(unsigned long oldstate); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; -long rcu_batches_completed(void); -long rcu_batches_completed_bh(void); -long rcu_batches_completed_sched(void); +unsigned long rcu_batches_started(void); +unsigned long rcu_batches_started_bh(void); +unsigned long rcu_batches_started_sched(void); +unsigned long rcu_batches_completed(void); +unsigned long rcu_batches_completed_bh(void); +unsigned long rcu_batches_completed_sched(void); void show_rcu_gp_kthreads(void); void rcu_force_quiescent_state(void); @@ -97,4 +100,6 @@ extern int rcu_scheduler_active __read_mostly; bool rcu_is_watching(void); +void rcu_all_qs(void); + #endif /* __LINUX_RCUTREE_H */ diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4419b99d8d6e..116655d92269 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -468,7 +468,7 @@ bool regmap_reg_in_ranges(unsigned int reg, * * @reg: Offset of the register within the regmap bank * @lsb: lsb of the register field. - * @reg: msb of the register field. + * @msb: msb of the register field. * @id_size: port size if it has some ports * @id_offset: address offset for each ports */ diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index 5479394fefce..5dd65acc2a69 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -32,6 +32,8 @@ struct da9211_pdata { * 2 : 2 phase 2 buck */ int num_buck; + int gpio_ren[DA9211_MAX_REGULATORS]; + struct device_node *reg_node[DA9211_MAX_REGULATORS]; struct regulator_init_data *init_data[DA9211_MAX_REGULATORS]; }; #endif diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 5f1e9ca47417..d4ad5b5a02bb 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -21,6 +21,7 @@ struct regmap; struct regulator_dev; +struct regulator_config; struct regulator_init_data; struct regulator_enable_gpio; @@ -205,6 +206,15 @@ enum regulator_type { * @supply_name: Identifying the regulator supply * @of_match: Name used to identify regulator in DT. * @regulators_node: Name of node containing regulator definitions in DT. + * @of_parse_cb: Optional callback called only if of_match is present. + * Will be called for each regulator parsed from DT, during + * init_data parsing. + * The regulator_config passed as argument to the callback will + * be a copy of config passed to regulator_register, valid only + * for this particular call. Callback may freely change the + * config but it cannot store it for later usage. + * Callback should return 0 on success or negative ERRNO + * indicating failure. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. * @irq: Interrupt number for the regulator. @@ -251,6 +261,9 @@ struct regulator_desc { const char *supply_name; const char *of_match; const char *regulators_node; + int (*of_parse_cb)(struct device_node *, + const struct regulator_desc *, + struct regulator_config *); int id; bool continuous_voltage_range; unsigned n_voltages; diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 0b08d05d470b..b07562e082c4 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -191,15 +191,22 @@ struct regulator_init_data { void *driver_data; /* core does not touch this */ }; -int regulator_suspend_prepare(suspend_state_t state); -int regulator_suspend_finish(void); - #ifdef CONFIG_REGULATOR void regulator_has_full_constraints(void); +int regulator_suspend_prepare(suspend_state_t state); +int regulator_suspend_finish(void); #else static inline void regulator_has_full_constraints(void) { } +static inline int regulator_suspend_prepare(suspend_state_t state) +{ + return 0; +} +static inline int regulator_suspend_finish(void) +{ + return 0; +} #endif #endif diff --git a/include/linux/regulator/mt6397-regulator.h b/include/linux/regulator/mt6397-regulator.h new file mode 100644 index 000000000000..30cc5963e265 --- /dev/null +++ b/include/linux/regulator/mt6397-regulator.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2014 MediaTek Inc. + * Author: Flora Fu <flora.fu@mediatek.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_REGULATOR_MT6397_H +#define __LINUX_REGULATOR_MT6397_H + +enum { + MT6397_ID_VPCA15 = 0, + MT6397_ID_VPCA7, + MT6397_ID_VSRAMCA15, + MT6397_ID_VSRAMCA7, + MT6397_ID_VCORE, + MT6397_ID_VGPU, + MT6397_ID_VDRM, + MT6397_ID_VIO18 = 7, + MT6397_ID_VTCXO, + MT6397_ID_VA28, + MT6397_ID_VCAMA, + MT6397_ID_VIO28, + MT6397_ID_VUSB, + MT6397_ID_VMC, + MT6397_ID_VMCH, + MT6397_ID_VEMC3V3, + MT6397_ID_VGP1, + MT6397_ID_VGP2, + MT6397_ID_VGP3, + MT6397_ID_VGP4, + MT6397_ID_VGP5, + MT6397_ID_VGP6, + MT6397_ID_VIBR, + MT6397_ID_RG_MAX, +}; + +#define MT6397_MAX_REGULATOR MT6397_ID_RG_MAX +#define MT6397_REGULATOR_ID97 0x97 +#define MT6397_REGULATOR_ID91 0x91 + +#endif /* __LINUX_REGULATOR_MT6397_H */ diff --git a/include/linux/regulator/pfuze100.h b/include/linux/regulator/pfuze100.h index 364f7a7c43db..70c6c66c5bcf 100644 --- a/include/linux/regulator/pfuze100.h +++ b/include/linux/regulator/pfuze100.h @@ -49,6 +49,20 @@ #define PFUZE200_VGEN5 11 #define PFUZE200_VGEN6 12 +#define PFUZE3000_SW1A 0 +#define PFUZE3000_SW1B 1 +#define PFUZE3000_SW2 2 +#define PFUZE3000_SW3 3 +#define PFUZE3000_SWBST 4 +#define PFUZE3000_VSNVS 5 +#define PFUZE3000_VREFDDR 6 +#define PFUZE3000_VLDO1 7 +#define PFUZE3000_VLDO2 8 +#define PFUZE3000_VCCSD 9 +#define PFUZE3000_V33 10 +#define PFUZE3000_VLDO3 11 +#define PFUZE3000_VLDO4 12 + struct regulator_init_data; struct pfuze_regulator_platform_data { diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 6d6be09a2fe5..dcad7ee0d746 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -161,7 +161,7 @@ extern void devm_rtc_device_unregister(struct device *dev, extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs); -extern int rtc_set_ntp_time(struct timespec now); +extern int rtc_set_ntp_time(struct timespec64 now); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); diff --git a/include/linux/smp.h b/include/linux/smp.h index 93dff5fff524..be91db2a7017 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -151,6 +151,13 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, static inline void kick_all_cpus_sync(void) { } static inline void wake_up_all_idle_cpus(void) { } +#ifdef CONFIG_UP_LATE_INIT +extern void __init up_late_init(void); +static inline void smp_init(void) { up_late_init(); } +#else +static inline void smp_init(void) { } +#endif + #endif /* !SMP */ /* diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h index b2b1afbb3202..cd519a11c2c6 100644 --- a/include/linux/spi/at86rf230.h +++ b/include/linux/spi/at86rf230.h @@ -12,10 +12,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * * Written by: * Dmitry Eremin-Solenikov <dmitry.baryshkov@siemens.com> */ diff --git a/include/linux/spi/l4f00242t03.h b/include/linux/spi/l4f00242t03.h index bc8677c8eba9..e69e9b51b21a 100644 --- a/include/linux/spi/l4f00242t03.h +++ b/include/linux/spi/l4f00242t03.h @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _INCLUDE_LINUX_SPI_L4F00242T03_H_ diff --git a/include/linux/spi/lms283gf05.h b/include/linux/spi/lms283gf05.h index 555d254e6606..fdd1d1d51da5 100644 --- a/include/linux/spi/lms283gf05.h +++ b/include/linux/spi/lms283gf05.h @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _INCLUDE_LINUX_SPI_LMS283GF05_H_ diff --git a/include/linux/spi/mxs-spi.h b/include/linux/spi/mxs-spi.h index 4835486f58e5..381d368b91b4 100644 --- a/include/linux/spi/mxs-spi.h +++ b/include/linux/spi/mxs-spi.h @@ -15,10 +15,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef __LINUX_SPI_MXS_SPI_H__ diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index d5a316550177..6d36dacec4ba 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -10,10 +10,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __linux_pxa2xx_spi_h #define __linux_pxa2xx_spi_h @@ -57,7 +53,6 @@ struct pxa2xx_spi_chip { #if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) #include <linux/clk.h> -#include <mach/dma.h> extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info); diff --git a/include/linux/spi/rspi.h b/include/linux/spi/rspi.h index e546b2ceb623..a693188cc08b 100644 --- a/include/linux/spi/rspi.h +++ b/include/linux/spi/rspi.h @@ -11,11 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #ifndef __LINUX_SPI_RENESAS_SPI_H__ diff --git a/include/linux/spi/sh_hspi.h b/include/linux/spi/sh_hspi.h index a1121f872ac1..aa0d440ab4f0 100644 --- a/include/linux/spi/sh_hspi.h +++ b/include/linux/spi/sh_hspi.h @@ -9,10 +9,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef SH_HSPI_H #define SH_HSPI_H diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h index 88a14d81c49e..b087a85f5f72 100644 --- a/include/linux/spi/sh_msiof.h +++ b/include/linux/spi/sh_msiof.h @@ -7,6 +7,8 @@ struct sh_msiof_spi_info { u16 num_chipselect; unsigned int dma_tx_id; unsigned int dma_rx_id; + u32 dtdl; + u32 syncdl; }; #endif /* __SPI_SH_MSIOF_H__ */ diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index a6ef2a8e6de4..ed9489d893a4 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -10,10 +10,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __LINUX_SPI_H @@ -260,6 +256,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @pump_messages: work struct for scheduling work to the message pump * @queue_lock: spinlock to syncronise access to message queue * @queue: message queue + * @idling: the device is entering idle state * @cur_msg: the currently in-flight message * @cur_msg_prepared: spi_prepare_message was called for the currently * in-flight message @@ -425,6 +422,7 @@ struct spi_master { spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; + bool idling; bool busy; bool running; bool rt; diff --git a/include/linux/spi/tle62x0.h b/include/linux/spi/tle62x0.h index 60b59187e590..414c6fddfcf0 100644 --- a/include/linux/spi/tle62x0.h +++ b/include/linux/spi/tle62x0.h @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ struct tle62x0_pdata { diff --git a/include/linux/spi/tsc2005.h b/include/linux/spi/tsc2005.h index 8f721e465e05..563b3b1799a8 100644 --- a/include/linux/spi/tsc2005.h +++ b/include/linux/spi/tsc2005.h @@ -12,11 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #ifndef _LINUX_SPI_TSC2005_H diff --git a/include/linux/srcu.h b/include/linux/srcu.h index a2783cb5d275..9cfd9623fb03 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -45,7 +45,7 @@ struct rcu_batch { #define RCU_BATCH_INIT(name) { NULL, &(name.head) } struct srcu_struct { - unsigned completed; + unsigned long completed; struct srcu_struct_array __percpu *per_cpu_ref; spinlock_t queue_lock; /* protect ->batch_queue, ->running */ bool running; @@ -102,13 +102,11 @@ void process_srcu(struct work_struct *work); * define and init a srcu struct at build time. * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it. */ -#define DEFINE_SRCU(name) \ +#define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ - struct srcu_struct name = __SRCU_STRUCT_INIT(name); - -#define DEFINE_STATIC_SRCU(name) \ - static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ - static struct srcu_struct name = __SRCU_STRUCT_INIT(name); + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) +#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) +#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) /** * call_srcu() - Queue a callback for invocation after an SRCU grace period @@ -135,7 +133,7 @@ int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); void synchronize_srcu_expedited(struct srcu_struct *sp); -long srcu_batches_completed(struct srcu_struct *sp); +unsigned long srcu_batches_completed(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 9b63d13ba82b..3eaae4754275 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -33,6 +33,7 @@ extern time64_t ktime_get_real_seconds(void); extern int __getnstimeofday64(struct timespec64 *tv); extern void getnstimeofday64(struct timespec64 *tv); +extern void getboottime64(struct timespec64 *ts); #if BITS_PER_LONG == 64 /** @@ -72,6 +73,11 @@ static inline struct timespec get_monotonic_coarse(void) { return get_monotonic_coarse64(); } + +static inline void getboottime(struct timespec *ts) +{ + return getboottime64(ts); +} #else /** * Deprecated. Use do_settimeofday64(). @@ -129,9 +135,15 @@ static inline struct timespec get_monotonic_coarse(void) { return timespec64_to_timespec(get_monotonic_coarse64()); } -#endif -extern void getboottime(struct timespec *ts); +static inline void getboottime(struct timespec *ts) +{ + struct timespec64 ts64; + + getboottime64(&ts64); + *ts = timespec64_to_timespec(ts64); +} +#endif #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) #define ktime_get_real_ts64(ts) getnstimeofday64(ts) @@ -217,6 +229,11 @@ static inline void get_monotonic_boottime(struct timespec *ts) *ts = ktime_to_timespec(ktime_get_boottime()); } +static inline void get_monotonic_boottime64(struct timespec64 *ts) +{ + *ts = ktime_to_timespec64(ktime_get_boottime()); +} + static inline void timekeeping_clocktai(struct timespec *ts) { *ts = ktime_to_timespec(ktime_get_clocktai()); diff --git a/include/linux/wait.h b/include/linux/wait.h index 37423e0e1379..537d58eea8a0 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -990,6 +990,32 @@ wait_on_bit_io(void *word, int bit, unsigned mode) } /** + * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * @timeout: timeout, in jiffies + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared. This is similar to wait_on_bit(), except also takes a + * timeout parameter. + * + * Returned value will be zero if the bit was cleared before the + * @timeout elapsed, or non-zero if the @timeout elapsed or process + * received a signal and the mode permitted wakeup on that signal. + */ +static inline int +wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_timeout(word, bit, + bit_wait_timeout, + mode, timeout); +} + +/** * wait_on_bit_action - wait for a bit to be cleared * @word: the word being waited on, a kernel virtual address * @bit: the bit of the word being waited on diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 139b5067345b..27609dfcce25 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -763,7 +763,7 @@ perf_trace_##call(void *__data, proto) \ struct ftrace_event_call *event_call = __data; \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_raw_##call *entry; \ - struct pt_regs __regs; \ + struct pt_regs *__regs; \ u64 __addr = 0, __count = 1; \ struct task_struct *__task = NULL; \ struct hlist_head *head; \ @@ -782,18 +782,19 @@ perf_trace_##call(void *__data, proto) \ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - perf_fetch_caller_regs(&__regs); \ entry = perf_trace_buf_prepare(__entry_size, \ event_call->event.type, &__regs, &rctx); \ if (!entry) \ return; \ \ + perf_fetch_caller_regs(__regs); \ + \ tstruct \ \ { assign; } \ \ perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ - __count, &__regs, head, __task); \ + __count, __regs, head, __task); \ } /* diff --git a/init/Kconfig b/init/Kconfig index 9afb971497f4..1354ac09b516 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -470,7 +470,6 @@ choice config TREE_RCU bool "Tree-based hierarchical RCU" depends on !PREEMPT && SMP - select IRQ_WORK help This option selects the RCU implementation that is designed for very large SMP system with hundreds or @@ -480,7 +479,6 @@ config TREE_RCU config PREEMPT_RCU bool "Preemptible tree-based hierarchical RCU" depends on PREEMPT - select IRQ_WORK help This option selects the RCU implementation that is designed for very large SMP systems with hundreds or @@ -501,9 +499,17 @@ config TINY_RCU endchoice +config SRCU + bool + help + This option selects the sleepable version of RCU. This version + permits arbitrary sleeping or blocking within RCU read-side critical + sections. + config TASKS_RCU bool "Task_based RCU implementation using voluntary context switch" default n + select SRCU help This option enables a task-based RCU implementation that uses only voluntary context switch (not preemption!), idle, and @@ -668,9 +674,10 @@ config RCU_BOOST config RCU_KTHREAD_PRIO int "Real-time priority to use for RCU worker threads" - range 1 99 - depends on RCU_BOOST - default 1 + range 1 99 if RCU_BOOST + range 0 99 if !RCU_BOOST + default 1 if RCU_BOOST + default 0 if !RCU_BOOST help This option specifies the SCHED_FIFO priority value that will be assigned to the rcuc/n and rcub/n threads and is also the value @@ -1595,6 +1602,7 @@ config PERF_EVENTS depends on HAVE_PERF_EVENTS select ANON_INODES select IRQ_WORK + select SRCU help Enable kernel support for various performance events provided by software and hardware. diff --git a/init/main.c b/init/main.c index 61b993767db5..179ada15d08a 100644 --- a/init/main.c +++ b/init/main.c @@ -87,10 +87,6 @@ #include <asm/sections.h> #include <asm/cacheflush.h> -#ifdef CONFIG_X86_LOCAL_APIC -#include <asm/smp.h> -#endif - static int kernel_init(void *); extern void init_IRQ(void); @@ -351,15 +347,6 @@ __setup("rdinit=", rdinit_setup); #ifndef CONFIG_SMP static const unsigned int setup_max_cpus = NR_CPUS; -#ifdef CONFIG_X86_LOCAL_APIC -static void __init smp_init(void) -{ - APIC_init_uniprocessor(); -} -#else -#define smp_init() do { } while (0) -#endif - static inline void setup_nr_cpu_ids(void) { } static inline void smp_prepare_cpus(unsigned int maxcpus) { } #endif diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 76768ee812b2..08561f1acd13 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -231,6 +231,10 @@ config RWSEM_SPIN_ON_OWNER def_bool y depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW +config LOCK_SPIN_ON_OWNER + def_bool y + depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER + config ARCH_USE_QUEUE_RWLOCK bool diff --git a/kernel/cpu.c b/kernel/cpu.c index 5d220234b3ca..1972b161c61e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -58,22 +58,23 @@ static int cpu_hotplug_disabled; static struct { struct task_struct *active_writer; - struct mutex lock; /* Synchronizes accesses to refcount, */ + /* wait queue to wake up the active_writer */ + wait_queue_head_t wq; + /* verifies that no writer will get active while readers are active */ + struct mutex lock; /* * Also blocks the new readers during * an ongoing cpu hotplug operation. */ - int refcount; - /* And allows lockless put_online_cpus(). */ - atomic_t puts_pending; + atomic_t refcount; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif } cpu_hotplug = { .active_writer = NULL, + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), - .refcount = 0, #ifdef CONFIG_DEBUG_LOCK_ALLOC .dep_map = {.name = "cpu_hotplug.lock" }, #endif @@ -86,15 +87,6 @@ static struct { #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) -static void apply_puts_pending(int max) -{ - int delta; - - if (atomic_read(&cpu_hotplug.puts_pending) >= max) { - delta = atomic_xchg(&cpu_hotplug.puts_pending, 0); - cpu_hotplug.refcount -= delta; - } -} void get_online_cpus(void) { @@ -103,8 +95,7 @@ void get_online_cpus(void) return; cpuhp_lock_acquire_read(); mutex_lock(&cpu_hotplug.lock); - apply_puts_pending(65536); - cpu_hotplug.refcount++; + atomic_inc(&cpu_hotplug.refcount); mutex_unlock(&cpu_hotplug.lock); } EXPORT_SYMBOL_GPL(get_online_cpus); @@ -116,8 +107,7 @@ bool try_get_online_cpus(void) if (!mutex_trylock(&cpu_hotplug.lock)) return false; cpuhp_lock_acquire_tryread(); - apply_puts_pending(65536); - cpu_hotplug.refcount++; + atomic_inc(&cpu_hotplug.refcount); mutex_unlock(&cpu_hotplug.lock); return true; } @@ -125,20 +115,18 @@ EXPORT_SYMBOL_GPL(try_get_online_cpus); void put_online_cpus(void) { + int refcount; + if (cpu_hotplug.active_writer == current) return; - if (!mutex_trylock(&cpu_hotplug.lock)) { - atomic_inc(&cpu_hotplug.puts_pending); - cpuhp_lock_release(); - return; - } - if (WARN_ON(!cpu_hotplug.refcount)) - cpu_hotplug.refcount++; /* try to fix things up */ + refcount = atomic_dec_return(&cpu_hotplug.refcount); + if (WARN_ON(refcount < 0)) /* try to fix things up */ + atomic_inc(&cpu_hotplug.refcount); + + if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq)) + wake_up(&cpu_hotplug.wq); - if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) - wake_up_process(cpu_hotplug.active_writer); - mutex_unlock(&cpu_hotplug.lock); cpuhp_lock_release(); } @@ -168,18 +156,20 @@ EXPORT_SYMBOL_GPL(put_online_cpus); */ void cpu_hotplug_begin(void) { - cpu_hotplug.active_writer = current; + DEFINE_WAIT(wait); + cpu_hotplug.active_writer = current; cpuhp_lock_acquire(); + for (;;) { mutex_lock(&cpu_hotplug.lock); - apply_puts_pending(1); - if (likely(!cpu_hotplug.refcount)) - break; - __set_current_state(TASK_UNINTERRUPTIBLE); + prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE); + if (likely(!atomic_read(&cpu_hotplug.refcount))) + break; mutex_unlock(&cpu_hotplug.lock); schedule(); } + finish_wait(&cpu_hotplug.wq, &wait); } void cpu_hotplug_done(void) diff --git a/kernel/events/core.c b/kernel/events/core.c index 19efcf13375a..7f2fbb8b5069 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -872,22 +872,32 @@ void perf_pmu_enable(struct pmu *pmu) pmu->pmu_enable(pmu); } -static DEFINE_PER_CPU(struct list_head, rotation_list); +static DEFINE_PER_CPU(struct list_head, active_ctx_list); /* - * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized - * because they're strictly cpu affine and rotate_start is called with IRQs - * disabled, while rotate_context is called from IRQ context. + * perf_event_ctx_activate(), perf_event_ctx_deactivate(), and + * perf_event_task_tick() are fully serialized because they're strictly cpu + * affine and perf_event_ctx{activate,deactivate} are called with IRQs + * disabled, while perf_event_task_tick is called from IRQ context. */ -static void perf_pmu_rotate_start(struct pmu *pmu) +static void perf_event_ctx_activate(struct perf_event_context *ctx) { - struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - struct list_head *head = this_cpu_ptr(&rotation_list); + struct list_head *head = this_cpu_ptr(&active_ctx_list); WARN_ON(!irqs_disabled()); - if (list_empty(&cpuctx->rotation_list)) - list_add(&cpuctx->rotation_list, head); + WARN_ON(!list_empty(&ctx->active_ctx_list)); + + list_add(&ctx->active_ctx_list, head); +} + +static void perf_event_ctx_deactivate(struct perf_event_context *ctx) +{ + WARN_ON(!irqs_disabled()); + + WARN_ON(list_empty(&ctx->active_ctx_list)); + + list_del_init(&ctx->active_ctx_list); } static void get_ctx(struct perf_event_context *ctx) @@ -907,6 +917,84 @@ static void put_ctx(struct perf_event_context *ctx) } /* + * Because of perf_event::ctx migration in sys_perf_event_open::move_group and + * perf_pmu_migrate_context() we need some magic. + * + * Those places that change perf_event::ctx will hold both + * perf_event_ctx::mutex of the 'old' and 'new' ctx value. + * + * Lock ordering is by mutex address. There is one other site where + * perf_event_context::mutex nests and that is put_event(). But remember that + * that is a parent<->child context relation, and migration does not affect + * children, therefore these two orderings should not interact. + * + * The change in perf_event::ctx does not affect children (as claimed above) + * because the sys_perf_event_open() case will install a new event and break + * the ctx parent<->child relation, and perf_pmu_migrate_context() is only + * concerned with cpuctx and that doesn't have children. + * + * The places that change perf_event::ctx will issue: + * + * perf_remove_from_context(); + * synchronize_rcu(); + * perf_install_in_context(); + * + * to affect the change. The remove_from_context() + synchronize_rcu() should + * quiesce the event, after which we can install it in the new location. This + * means that only external vectors (perf_fops, prctl) can perturb the event + * while in transit. Therefore all such accessors should also acquire + * perf_event_context::mutex to serialize against this. + * + * However; because event->ctx can change while we're waiting to acquire + * ctx->mutex we must be careful and use the below perf_event_ctx_lock() + * function. + * + * Lock order: + * task_struct::perf_event_mutex + * perf_event_context::mutex + * perf_event_context::lock + * perf_event::child_mutex; + * perf_event::mmap_mutex + * mmap_sem + */ +static struct perf_event_context * +perf_event_ctx_lock_nested(struct perf_event *event, int nesting) +{ + struct perf_event_context *ctx; + +again: + rcu_read_lock(); + ctx = ACCESS_ONCE(event->ctx); + if (!atomic_inc_not_zero(&ctx->refcount)) { + rcu_read_unlock(); + goto again; + } + rcu_read_unlock(); + + mutex_lock_nested(&ctx->mutex, nesting); + if (event->ctx != ctx) { + mutex_unlock(&ctx->mutex); + put_ctx(ctx); + goto again; + } + + return ctx; +} + +static inline struct perf_event_context * +perf_event_ctx_lock(struct perf_event *event) +{ + return perf_event_ctx_lock_nested(event, 0); +} + +static void perf_event_ctx_unlock(struct perf_event *event, + struct perf_event_context *ctx) +{ + mutex_unlock(&ctx->mutex); + put_ctx(ctx); +} + +/* * This must be done under the ctx->lock, such as to serialize against * context_equiv(), therefore we cannot call put_ctx() since that might end up * calling scheduler related locks and ctx->lock nests inside those. @@ -1155,8 +1243,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) ctx->nr_branch_stack++; list_add_rcu(&event->event_entry, &ctx->event_list); - if (!ctx->nr_events) - perf_pmu_rotate_start(ctx->pmu); ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; @@ -1275,6 +1361,8 @@ static void perf_group_attach(struct perf_event *event) if (group_leader == event) return; + WARN_ON_ONCE(group_leader->ctx != event->ctx); + if (group_leader->group_flags & PERF_GROUP_SOFTWARE && !is_software_event(event)) group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; @@ -1296,6 +1384,10 @@ static void list_del_event(struct perf_event *event, struct perf_event_context *ctx) { struct perf_cpu_context *cpuctx; + + WARN_ON_ONCE(event->ctx != ctx); + lockdep_assert_held(&ctx->lock); + /* * We can have double detach due to exit/hot-unplug + close. */ @@ -1380,6 +1472,8 @@ static void perf_group_detach(struct perf_event *event) /* Inherit group flags from the previous leader */ sibling->group_flags = event->group_flags; + + WARN_ON_ONCE(sibling->ctx != event->ctx); } out: @@ -1442,6 +1536,10 @@ event_sched_out(struct perf_event *event, { u64 tstamp = perf_event_time(event); u64 delta; + + WARN_ON_ONCE(event->ctx != ctx); + lockdep_assert_held(&ctx->lock); + /* * An event which could not be activated because of * filter mismatch still needs to have its timings @@ -1471,7 +1569,8 @@ event_sched_out(struct perf_event *event, if (!is_software_event(event)) cpuctx->active_oncpu--; - ctx->nr_active--; + if (!--ctx->nr_active) + perf_event_ctx_deactivate(ctx); if (event->attr.freq && event->attr.sample_freq) ctx->nr_freq--; if (event->attr.exclusive || !cpuctx->active_oncpu) @@ -1654,7 +1753,7 @@ int __perf_event_disable(void *info) * is the current context on this CPU and preemption is disabled, * hence we can't get into perf_event_task_sched_out for this context. */ -void perf_event_disable(struct perf_event *event) +static void _perf_event_disable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; @@ -1695,6 +1794,19 @@ retry: } raw_spin_unlock_irq(&ctx->lock); } + +/* + * Strictly speaking kernel users cannot create groups and therefore this + * interface does not need the perf_event_ctx_lock() magic. + */ +void perf_event_disable(struct perf_event *event) +{ + struct perf_event_context *ctx; + + ctx = perf_event_ctx_lock(event); + _perf_event_disable(event); + perf_event_ctx_unlock(event, ctx); +} EXPORT_SYMBOL_GPL(perf_event_disable); static void perf_set_shadow_time(struct perf_event *event, @@ -1782,7 +1894,8 @@ event_sched_in(struct perf_event *event, if (!is_software_event(event)) cpuctx->active_oncpu++; - ctx->nr_active++; + if (!ctx->nr_active++) + perf_event_ctx_activate(ctx); if (event->attr.freq && event->attr.sample_freq) ctx->nr_freq++; @@ -2158,7 +2271,7 @@ unlock: * perf_event_for_each_child or perf_event_for_each as described * for perf_event_disable. */ -void perf_event_enable(struct perf_event *event) +static void _perf_event_enable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; @@ -2214,9 +2327,21 @@ retry: out: raw_spin_unlock_irq(&ctx->lock); } + +/* + * See perf_event_disable(); + */ +void perf_event_enable(struct perf_event *event) +{ + struct perf_event_context *ctx; + + ctx = perf_event_ctx_lock(event); + _perf_event_enable(event); + perf_event_ctx_unlock(event, ctx); +} EXPORT_SYMBOL_GPL(perf_event_enable); -int perf_event_refresh(struct perf_event *event, int refresh) +static int _perf_event_refresh(struct perf_event *event, int refresh) { /* * not supported on inherited events @@ -2225,10 +2350,25 @@ int perf_event_refresh(struct perf_event *event, int refresh) return -EINVAL; atomic_add(refresh, &event->event_limit); - perf_event_enable(event); + _perf_event_enable(event); return 0; } + +/* + * See perf_event_disable() + */ +int perf_event_refresh(struct perf_event *event, int refresh) +{ + struct perf_event_context *ctx; + int ret; + + ctx = perf_event_ctx_lock(event); + ret = _perf_event_refresh(event, refresh); + perf_event_ctx_unlock(event, ctx); + + return ret; +} EXPORT_SYMBOL_GPL(perf_event_refresh); static void ctx_sched_out(struct perf_event_context *ctx, @@ -2612,12 +2752,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, perf_pmu_enable(ctx->pmu); perf_ctx_unlock(cpuctx, ctx); - - /* - * Since these rotations are per-cpu, we need to ensure the - * cpu-context we got scheduled on is actually rotating. - */ - perf_pmu_rotate_start(ctx->pmu); } /* @@ -2905,25 +3039,18 @@ static void rotate_ctx(struct perf_event_context *ctx) list_rotate_left(&ctx->flexible_groups); } -/* - * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized - * because they're strictly cpu affine and rotate_start is called with IRQs - * disabled, while rotate_context is called from IRQ context. - */ static int perf_rotate_context(struct perf_cpu_context *cpuctx) { struct perf_event_context *ctx = NULL; - int rotate = 0, remove = 1; + int rotate = 0; if (cpuctx->ctx.nr_events) { - remove = 0; if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) rotate = 1; } ctx = cpuctx->task_ctx; if (ctx && ctx->nr_events) { - remove = 0; if (ctx->nr_events != ctx->nr_active) rotate = 1; } @@ -2947,8 +3074,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx) perf_pmu_enable(cpuctx->ctx.pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); done: - if (remove) - list_del_init(&cpuctx->rotation_list); return rotate; } @@ -2966,9 +3091,8 @@ bool perf_event_can_stop_tick(void) void perf_event_task_tick(void) { - struct list_head *head = this_cpu_ptr(&rotation_list); - struct perf_cpu_context *cpuctx, *tmp; - struct perf_event_context *ctx; + struct list_head *head = this_cpu_ptr(&active_ctx_list); + struct perf_event_context *ctx, *tmp; int throttled; WARN_ON(!irqs_disabled()); @@ -2976,14 +3100,8 @@ void perf_event_task_tick(void) __this_cpu_inc(perf_throttled_seq); throttled = __this_cpu_xchg(perf_throttled_count, 0); - list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { - ctx = &cpuctx->ctx; + list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) perf_adjust_freq_unthr_context(ctx, throttled); - - ctx = cpuctx->task_ctx; - if (ctx) - perf_adjust_freq_unthr_context(ctx, throttled); - } } static int event_enable_on_exec(struct perf_event *event, @@ -3142,6 +3260,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) { raw_spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->active_ctx_list); INIT_LIST_HEAD(&ctx->pinned_groups); INIT_LIST_HEAD(&ctx->flexible_groups); INIT_LIST_HEAD(&ctx->event_list); @@ -3421,7 +3540,16 @@ static void perf_remove_from_owner(struct perf_event *event) rcu_read_unlock(); if (owner) { - mutex_lock(&owner->perf_event_mutex); + /* + * If we're here through perf_event_exit_task() we're already + * holding ctx->mutex which would be an inversion wrt. the + * normal lock order. + * + * However we can safely take this lock because its the child + * ctx->mutex. + */ + mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING); + /* * We have to re-check the event->owner field, if it is cleared * we raced with perf_event_exit_task(), acquiring the mutex @@ -3440,7 +3568,7 @@ static void perf_remove_from_owner(struct perf_event *event) */ static void put_event(struct perf_event *event) { - struct perf_event_context *ctx = event->ctx; + struct perf_event_context *ctx; if (!atomic_long_dec_and_test(&event->refcount)) return; @@ -3448,7 +3576,6 @@ static void put_event(struct perf_event *event) if (!is_kernel_event(event)) perf_remove_from_owner(event); - WARN_ON_ONCE(ctx->parent_ctx); /* * There are two ways this annotation is useful: * @@ -3461,7 +3588,8 @@ static void put_event(struct perf_event *event) * the last filedesc died, so there is no possibility * to trigger the AB-BA case. */ - mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); + ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING); + WARN_ON_ONCE(ctx->parent_ctx); perf_remove_from_context(event, true); mutex_unlock(&ctx->mutex); @@ -3547,12 +3675,13 @@ static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event->group_leader, *sub; - int n = 0, size = 0, ret = -EFAULT; struct perf_event_context *ctx = leader->ctx; - u64 values[5]; + int n = 0, size = 0, ret; u64 count, enabled, running; + u64 values[5]; + + lockdep_assert_held(&ctx->mutex); - mutex_lock(&ctx->mutex); count = perf_event_read_value(leader, &enabled, &running); values[n++] = 1 + leader->nr_siblings; @@ -3567,7 +3696,7 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); if (copy_to_user(buf, values, size)) - goto unlock; + return -EFAULT; ret = size; @@ -3581,14 +3710,11 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); if (copy_to_user(buf + ret, values, size)) { - ret = -EFAULT; - goto unlock; + return -EFAULT; } ret += size; } -unlock: - mutex_unlock(&ctx->mutex); return ret; } @@ -3660,8 +3786,14 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct perf_event *event = file->private_data; + struct perf_event_context *ctx; + int ret; - return perf_read_hw(event, buf, count); + ctx = perf_event_ctx_lock(event); + ret = perf_read_hw(event, buf, count); + perf_event_ctx_unlock(event, ctx); + + return ret; } static unsigned int perf_poll(struct file *file, poll_table *wait) @@ -3687,7 +3819,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) return events; } -static void perf_event_reset(struct perf_event *event) +static void _perf_event_reset(struct perf_event *event) { (void)perf_event_read(event); local64_set(&event->count, 0); @@ -3706,6 +3838,7 @@ static void perf_event_for_each_child(struct perf_event *event, struct perf_event *child; WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); func(event); list_for_each_entry(child, &event->child_list, child_list) @@ -3719,14 +3852,13 @@ static void perf_event_for_each(struct perf_event *event, struct perf_event_context *ctx = event->ctx; struct perf_event *sibling; - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); + lockdep_assert_held(&ctx->mutex); + event = event->group_leader; perf_event_for_each_child(event, func); list_for_each_entry(sibling, &event->sibling_list, group_entry) perf_event_for_each_child(sibling, func); - mutex_unlock(&ctx->mutex); } static int perf_event_period(struct perf_event *event, u64 __user *arg) @@ -3796,25 +3928,24 @@ static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event); static int perf_event_set_filter(struct perf_event *event, void __user *arg); -static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) { - struct perf_event *event = file->private_data; void (*func)(struct perf_event *); u32 flags = arg; switch (cmd) { case PERF_EVENT_IOC_ENABLE: - func = perf_event_enable; + func = _perf_event_enable; break; case PERF_EVENT_IOC_DISABLE: - func = perf_event_disable; + func = _perf_event_disable; break; case PERF_EVENT_IOC_RESET: - func = perf_event_reset; + func = _perf_event_reset; break; case PERF_EVENT_IOC_REFRESH: - return perf_event_refresh(event, arg); + return _perf_event_refresh(event, arg); case PERF_EVENT_IOC_PERIOD: return perf_event_period(event, (u64 __user *)arg); @@ -3861,6 +3992,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } +static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct perf_event *event = file->private_data; + struct perf_event_context *ctx; + long ret; + + ctx = perf_event_ctx_lock(event); + ret = _perf_ioctl(event, cmd, arg); + perf_event_ctx_unlock(event, ctx); + + return ret; +} + #ifdef CONFIG_COMPAT static long perf_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -3883,11 +4027,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd, int perf_event_task_enable(void) { + struct perf_event_context *ctx; struct perf_event *event; mutex_lock(¤t->perf_event_mutex); - list_for_each_entry(event, ¤t->perf_event_list, owner_entry) - perf_event_for_each_child(event, perf_event_enable); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { + ctx = perf_event_ctx_lock(event); + perf_event_for_each_child(event, _perf_event_enable); + perf_event_ctx_unlock(event, ctx); + } mutex_unlock(¤t->perf_event_mutex); return 0; @@ -3895,11 +4043,15 @@ int perf_event_task_enable(void) int perf_event_task_disable(void) { + struct perf_event_context *ctx; struct perf_event *event; mutex_lock(¤t->perf_event_mutex); - list_for_each_entry(event, ¤t->perf_event_list, owner_entry) - perf_event_for_each_child(event, perf_event_disable); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { + ctx = perf_event_ctx_lock(event); + perf_event_for_each_child(event, _perf_event_disable); + perf_event_ctx_unlock(event, ctx); + } mutex_unlock(¤t->perf_event_mutex); return 0; @@ -5889,6 +6041,8 @@ end: rcu_read_unlock(); } +DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]); + int perf_swevent_get_recursion_context(void) { struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); @@ -5904,21 +6058,30 @@ inline void perf_swevent_put_recursion_context(int rctx) put_recursion_context(swhash->recursion, rctx); } -void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) +void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { struct perf_sample_data data; - int rctx; - preempt_disable_notrace(); - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) + if (WARN_ON_ONCE(!regs)) return; perf_sample_data_init(&data, addr, 0); - do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); +} + +void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) +{ + int rctx; + + preempt_disable_notrace(); + rctx = perf_swevent_get_recursion_context(); + if (unlikely(rctx < 0)) + goto fail; + + ___perf_sw_event(event_id, nr, regs, addr); perf_swevent_put_recursion_context(rctx); +fail: preempt_enable_notrace(); } @@ -6780,7 +6943,6 @@ skip_type: __perf_cpu_hrtimer_init(cpuctx, cpu); - INIT_LIST_HEAD(&cpuctx->rotation_list); cpuctx->unique_pmu = pmu; } @@ -6853,6 +7015,20 @@ void perf_pmu_unregister(struct pmu *pmu) } EXPORT_SYMBOL_GPL(perf_pmu_unregister); +static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) +{ + int ret; + + if (!try_module_get(pmu->module)) + return -ENODEV; + event->pmu = pmu; + ret = pmu->event_init(event); + if (ret) + module_put(pmu->module); + + return ret; +} + struct pmu *perf_init_event(struct perf_event *event) { struct pmu *pmu = NULL; @@ -6865,24 +7041,14 @@ struct pmu *perf_init_event(struct perf_event *event) pmu = idr_find(&pmu_idr, event->attr.type); rcu_read_unlock(); if (pmu) { - if (!try_module_get(pmu->module)) { - pmu = ERR_PTR(-ENODEV); - goto unlock; - } - event->pmu = pmu; - ret = pmu->event_init(event); + ret = perf_try_init_event(pmu, event); if (ret) pmu = ERR_PTR(ret); goto unlock; } list_for_each_entry_rcu(pmu, &pmus, entry) { - if (!try_module_get(pmu->module)) { - pmu = ERR_PTR(-ENODEV); - goto unlock; - } - event->pmu = pmu; - ret = pmu->event_init(event); + ret = perf_try_init_event(pmu, event); if (!ret) goto unlock; @@ -7246,6 +7412,15 @@ out: return ret; } +static void mutex_lock_double(struct mutex *a, struct mutex *b) +{ + if (b < a) + swap(a, b); + + mutex_lock(a); + mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -7261,7 +7436,7 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event *group_leader = NULL, *output_event = NULL; struct perf_event *event, *sibling; struct perf_event_attr attr; - struct perf_event_context *ctx; + struct perf_event_context *ctx, *uninitialized_var(gctx); struct file *event_file = NULL; struct fd group = {NULL, 0}; struct task_struct *task = NULL; @@ -7459,43 +7634,68 @@ SYSCALL_DEFINE5(perf_event_open, } if (move_group) { - struct perf_event_context *gctx = group_leader->ctx; - - mutex_lock(&gctx->mutex); - perf_remove_from_context(group_leader, false); + gctx = group_leader->ctx; /* - * Removing from the context ends up with disabled - * event. What we want here is event in the initial - * startup state, ready to be add into new context. + * See perf_event_ctx_lock() for comments on the details + * of swizzling perf_event::ctx. */ - perf_event__state_init(group_leader); + mutex_lock_double(&gctx->mutex, &ctx->mutex); + + perf_remove_from_context(group_leader, false); + list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { perf_remove_from_context(sibling, false); - perf_event__state_init(sibling); put_ctx(gctx); } - mutex_unlock(&gctx->mutex); - put_ctx(gctx); + } else { + mutex_lock(&ctx->mutex); } WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); if (move_group) { + /* + * Wait for everybody to stop referencing the events through + * the old lists, before installing it on new lists. + */ synchronize_rcu(); - perf_install_in_context(ctx, group_leader, group_leader->cpu); - get_ctx(ctx); + + /* + * Install the group siblings before the group leader. + * + * Because a group leader will try and install the entire group + * (through the sibling list, which is still in-tact), we can + * end up with siblings installed in the wrong context. + * + * By installing siblings first we NO-OP because they're not + * reachable through the group lists. + */ list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { + perf_event__state_init(sibling); perf_install_in_context(ctx, sibling, sibling->cpu); get_ctx(ctx); } + + /* + * Removing from the context ends up with disabled + * event. What we want here is event in the initial + * startup state, ready to be add into new context. + */ + perf_event__state_init(group_leader); + perf_install_in_context(ctx, group_leader, group_leader->cpu); + get_ctx(ctx); } perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); + + if (move_group) { + mutex_unlock(&gctx->mutex); + put_ctx(gctx); + } mutex_unlock(&ctx->mutex); put_online_cpus(); @@ -7603,7 +7803,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; - mutex_lock(&src_ctx->mutex); + /* + * See perf_event_ctx_lock() for comments on the details + * of swizzling perf_event::ctx. + */ + mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex); list_for_each_entry_safe(event, tmp, &src_ctx->event_list, event_entry) { perf_remove_from_context(event, false); @@ -7611,11 +7815,36 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) put_ctx(src_ctx); list_add(&event->migrate_entry, &events); } - mutex_unlock(&src_ctx->mutex); + /* + * Wait for the events to quiesce before re-instating them. + */ synchronize_rcu(); - mutex_lock(&dst_ctx->mutex); + /* + * Re-instate events in 2 passes. + * + * Skip over group leaders and only install siblings on this first + * pass, siblings will not get enabled without a leader, however a + * leader will enable its siblings, even if those are still on the old + * context. + */ + list_for_each_entry_safe(event, tmp, &events, migrate_entry) { + if (event->group_leader == event) + continue; + + list_del(&event->migrate_entry); + if (event->state >= PERF_EVENT_STATE_OFF) + event->state = PERF_EVENT_STATE_INACTIVE; + account_event_cpu(event, dst_cpu); + perf_install_in_context(dst_ctx, event, dst_cpu); + get_ctx(dst_ctx); + } + + /* + * Once all the siblings are setup properly, install the group leaders + * to make it go. + */ list_for_each_entry_safe(event, tmp, &events, migrate_entry) { list_del(&event->migrate_entry); if (event->state >= PERF_EVENT_STATE_OFF) @@ -7625,6 +7854,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) get_ctx(dst_ctx); } mutex_unlock(&dst_ctx->mutex); + mutex_unlock(&src_ctx->mutex); } EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); @@ -7811,14 +8041,19 @@ static void perf_free_event(struct perf_event *event, put_event(parent); + raw_spin_lock_irq(&ctx->lock); perf_group_detach(event); list_del_event(event, ctx); + raw_spin_unlock_irq(&ctx->lock); free_event(event); } /* - * free an unexposed, unused context as created by inheritance by + * Free an unexposed, unused context as created by inheritance by * perf_event_init_task below, used by fork() in case of fail. + * + * Not all locks are strictly required, but take them anyway to be nice and + * help out with the lockdep assertions. */ void perf_event_free_task(struct task_struct *task) { @@ -8137,7 +8372,7 @@ static void __init perf_event_init_all_cpus(void) for_each_possible_cpu(cpu) { swhash = &per_cpu(swevent_htable, cpu); mutex_init(&swhash->hlist_mutex); - INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); + INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu)); } } @@ -8158,22 +8393,11 @@ static void perf_event_init_cpu(int cpu) } #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC -static void perf_pmu_rotate_stop(struct pmu *pmu) -{ - struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - - WARN_ON(!irqs_disabled()); - - list_del_init(&cpuctx->rotation_list); -} - static void __perf_event_exit_context(void *__info) { struct remove_event re = { .detach_group = true }; struct perf_event_context *ctx = __info; - perf_pmu_rotate_stop(ctx->pmu); - rcu_read_lock(); list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) __perf_remove_from_context(&re); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 146a5792b1d2..eadb95ce7aac 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -13,12 +13,13 @@ #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/circ_buf.h> +#include <linux/poll.h> #include "internal.h" static void perf_output_wakeup(struct perf_output_handle *handle) { - atomic_set(&handle->rb->poll, POLL_IN); + atomic_set(&handle->rb->poll, POLLIN); handle->event->pending_wakeup = 1; irq_work_queue(&handle->event->pending); diff --git a/kernel/futex.c b/kernel/futex.c index 63678b573d61..4eeb63de7e54 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2258,7 +2258,7 @@ static long futex_wait_restart(struct restart_block *restart) * if there are waiters then it will block, it does PI, etc. (Due to * races the kernel might see a 0 value of the futex too.) */ -static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, +static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; @@ -2953,11 +2953,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, case FUTEX_WAKE_OP: return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: - return futex_lock_pi(uaddr, flags, val, timeout, 0); + return futex_lock_pi(uaddr, flags, timeout, 0); case FUTEX_UNLOCK_PI: return futex_unlock_pi(uaddr, flags); case FUTEX_TRYLOCK_PI: - return futex_lock_pi(uaddr, flags, 0, timeout, 1); + return futex_lock_pi(uaddr, flags, NULL, 1); case FUTEX_WAIT_REQUEUE_PI: val3 = FUTEX_BITSET_MATCH_ANY; return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 8541bfdfd232..4ca8eb151975 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o +obj-y += mutex.o semaphore.o rwsem.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg @@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o endif obj-$(CONFIG_SMP) += spinlock.o +obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o obj-$(CONFIG_SMP) += lglock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index 4d60986fcbee..d1fe2ba5bac9 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -108,20 +108,4 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) arch_mcs_spin_unlock_contended(&next->locked); } -/* - * Cancellable version of the MCS lock above. - * - * Intended for adaptive spinning of sleeping locks: - * mutex_lock()/rwsem_down_{read,write}() etc. - */ - -struct optimistic_spin_node { - struct optimistic_spin_node *next, *prev; - int locked; /* 1 if lock acquired */ - int cpu; /* encoded CPU # value */ -}; - -extern bool osq_lock(struct optimistic_spin_queue *lock); -extern void osq_unlock(struct optimistic_spin_queue *lock); - #endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 454195194d4a..94674e5919cb 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -81,7 +81,7 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); * The mutex must later on be released by the same task that * acquired it. Recursive locking is not allowed. The task * may not exit without first unlocking the mutex. Also, kernel - * memory where the mutex resides mutex must not be freed with + * memory where the mutex resides must not be freed with * the mutex still locked. The mutex must first be initialized * (or statically defined) before it can be locked. memset()-ing * the mutex to 0 is not allowed. @@ -147,7 +147,7 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, } /* - * after acquiring lock with fastpath or when we lost out in contested + * After acquiring lock with fastpath or when we lost out in contested * slowpath, set ctx and wake up any waiters so they can recheck. * * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, @@ -191,19 +191,32 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, spin_unlock_mutex(&lock->base.wait_lock, flags); } - -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* - * In order to avoid a stampede of mutex spinners from acquiring the mutex - * more or less simultaneously, the spinners need to acquire a MCS lock - * first before spinning on the owner field. + * After acquiring lock in the slowpath set ctx and wake up any + * waiters so they can recheck. * + * Callers must hold the mutex wait_lock. */ +static __always_inline void +ww_mutex_set_context_slowpath(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx) +{ + struct mutex_waiter *cur; -/* - * Mutex spinning code migrated from kernel/sched/core.c - */ + ww_mutex_lock_acquired(lock, ctx); + lock->ctx = ctx; + + /* + * Give any possible sleeping processes the chance to wake up, + * so they can recheck if they have to back off. + */ + list_for_each_entry(cur, &lock->base.wait_list, list) { + debug_mutex_wake_waiter(&lock->base, cur); + wake_up_process(cur->task); + } +} +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER static inline bool owner_running(struct mutex *lock, struct task_struct *owner) { if (lock->owner != owner) @@ -307,6 +320,11 @@ static bool mutex_optimistic_spin(struct mutex *lock, if (!mutex_can_spin_on_owner(lock)) goto done; + /* + * In order to avoid a stampede of mutex spinners trying to + * acquire the mutex all at once, the spinners need to take a + * MCS (queued) lock first before spinning on the owner field. + */ if (!osq_lock(&lock->osq)) goto done; @@ -469,7 +487,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) EXPORT_SYMBOL(ww_mutex_unlock); static inline int __sched -__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) +__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); @@ -557,7 +575,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } if (use_ww_ctx && ww_ctx->acquired > 0) { - ret = __mutex_lock_check_stamp(lock, ww_ctx); + ret = __ww_mutex_lock_check_stamp(lock, ww_ctx); if (ret) goto err; } @@ -569,6 +587,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, schedule_preempt_disabled(); spin_lock_mutex(&lock->wait_lock, flags); } + __set_task_state(task, TASK_RUNNING); + mutex_remove_waiter(lock, &waiter, current_thread_info()); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) @@ -582,23 +602,7 @@ skip_wait: if (use_ww_ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); - struct mutex_waiter *cur; - - /* - * This branch gets optimized out for the common case, - * and is only important for ww_mutex_lock. - */ - ww_mutex_lock_acquired(ww, ww_ctx); - ww->ctx = ww_ctx; - - /* - * Give any possible sleeping processes the chance to wake up, - * so they can recheck if they have to back off. - */ - list_for_each_entry(cur, &lock->wait_list, list) { - debug_mutex_wake_waiter(lock, cur); - wake_up_process(cur->task); - } + ww_mutex_set_context_slowpath(ww, ww_ctx); } spin_unlock_mutex(&lock->wait_lock, flags); diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/osq_lock.c index 9887a905a762..c112d00341b0 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/osq_lock.c @@ -1,8 +1,6 @@ #include <linux/percpu.h> #include <linux/sched.h> -#include "mcs_spinlock.h" - -#ifdef CONFIG_SMP +#include <linux/osq_lock.h> /* * An MCS like lock especially tailored for optimistic spinning for sleeping @@ -111,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) * cmpxchg in an attempt to undo our queueing. */ - while (!smp_load_acquire(&node->locked)) { + while (!ACCESS_ONCE(node->locked)) { /* * If we need to reschedule bail... so we can block. */ @@ -203,6 +201,3 @@ void osq_unlock(struct optimistic_spin_queue *lock) if (next) ACCESS_ONCE(next->locked) = 1; } - -#endif - diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 7c98873a3077..3059bc2f022d 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1130,6 +1130,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, set_current_state(state); } + __set_current_state(TASK_RUNNING); return ret; } @@ -1188,10 +1189,9 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); if (likely(!ret)) + /* sleep on the mutex */ ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); - set_current_state(TASK_RUNNING); - if (unlikely(ret)) { remove_waiter(lock, &waiter); rt_mutex_handle_deadlock(ret, chwalk, &waiter); @@ -1626,10 +1626,9 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, set_current_state(TASK_INTERRUPTIBLE); + /* sleep on the mutex */ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); - set_current_state(TASK_RUNNING); - if (unlikely(ret)) remove_waiter(lock, waiter); diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 2c93571162cb..2555ae15ec14 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -154,7 +154,7 @@ void __sched __down_read(struct rw_semaphore *sem) set_task_state(tsk, TASK_UNINTERRUPTIBLE); } - tsk->state = TASK_RUNNING; + __set_task_state(tsk, TASK_RUNNING); out: ; } diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 7628c3fc37ca..2f7cc4076f50 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -242,8 +242,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) schedule(); } - tsk->state = TASK_RUNNING; - + __set_task_state(tsk, TASK_RUNNING); return sem; } EXPORT_SYMBOL(rwsem_down_read_failed); diff --git a/kernel/notifier.c b/kernel/notifier.c index 4803da6eab62..ae9fc7cc360e 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -402,6 +402,7 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh, } EXPORT_SYMBOL_GPL(raw_notifier_call_chain); +#ifdef CONFIG_SRCU /* * SRCU notifier chain routines. Registration and unregistration * use a mutex, and call_chain is synchronized by SRCU (no locks). @@ -528,6 +529,8 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh) } EXPORT_SYMBOL_GPL(srcu_init_notifier_head); +#endif /* CONFIG_SRCU */ + static ATOMIC_NOTIFIER_HEAD(die_chain); int notrace notify_die(enum die_val val, const char *str, diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 48b28d387c7f..7e01f78f0417 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -251,6 +251,7 @@ config APM_EMULATION config PM_OPP bool + select SRCU ---help--- SOCs have a standard set of tuples consisting of frequency and voltage pairs that the device will support per voltage domain. This diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index e6fae503d1bc..50a808424b06 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -1,4 +1,5 @@ -obj-y += update.o srcu.o +obj-y += update.o +obj-$(CONFIG_SRCU) += srcu.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += tree.o obj-$(CONFIG_PREEMPT_RCU) += tree.o diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 07bb02eda844..80adef7d4c3d 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -137,4 +137,10 @@ int rcu_jiffies_till_stall_check(void); void rcu_early_boot_tests(void); +/* + * This function really isn't for public consumption, but RCU is special in + * that context switches can allow the state machine to make progress. + */ +extern void resched_cpu(int cpu); + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 4d559baf06e0..30d42aa55d83 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -244,7 +244,8 @@ struct rcu_torture_ops { int (*readlock)(void); void (*read_delay)(struct torture_random_state *rrsp); void (*readunlock)(int idx); - int (*completed)(void); + unsigned long (*started)(void); + unsigned long (*completed)(void); void (*deferred_free)(struct rcu_torture *p); void (*sync)(void); void (*exp_sync)(void); @@ -296,11 +297,6 @@ static void rcu_torture_read_unlock(int idx) __releases(RCU) rcu_read_unlock(); } -static int rcu_torture_completed(void) -{ - return rcu_batches_completed(); -} - /* * Update callback in the pipe. This should be invoked after a grace period. */ @@ -356,7 +352,7 @@ rcu_torture_cb(struct rcu_head *p) cur_ops->deferred_free(rp); } -static int rcu_no_completed(void) +static unsigned long rcu_no_completed(void) { return 0; } @@ -377,7 +373,8 @@ static struct rcu_torture_ops rcu_ops = { .readlock = rcu_torture_read_lock, .read_delay = rcu_read_delay, .readunlock = rcu_torture_read_unlock, - .completed = rcu_torture_completed, + .started = rcu_batches_started, + .completed = rcu_batches_completed, .deferred_free = rcu_torture_deferred_free, .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, @@ -407,11 +404,6 @@ static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH) rcu_read_unlock_bh(); } -static int rcu_bh_torture_completed(void) -{ - return rcu_batches_completed_bh(); -} - static void rcu_bh_torture_deferred_free(struct rcu_torture *p) { call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); @@ -423,7 +415,8 @@ static struct rcu_torture_ops rcu_bh_ops = { .readlock = rcu_bh_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = rcu_bh_torture_read_unlock, - .completed = rcu_bh_torture_completed, + .started = rcu_batches_started_bh, + .completed = rcu_batches_completed_bh, .deferred_free = rcu_bh_torture_deferred_free, .sync = synchronize_rcu_bh, .exp_sync = synchronize_rcu_bh_expedited, @@ -466,6 +459,7 @@ static struct rcu_torture_ops rcu_busted_ops = { .readlock = rcu_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = rcu_torture_read_unlock, + .started = rcu_no_completed, .completed = rcu_no_completed, .deferred_free = rcu_busted_torture_deferred_free, .sync = synchronize_rcu_busted, @@ -510,7 +504,7 @@ static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) srcu_read_unlock(&srcu_ctl, idx); } -static int srcu_torture_completed(void) +static unsigned long srcu_torture_completed(void) { return srcu_batches_completed(&srcu_ctl); } @@ -564,6 +558,7 @@ static struct rcu_torture_ops srcu_ops = { .readlock = srcu_torture_read_lock, .read_delay = srcu_read_delay, .readunlock = srcu_torture_read_unlock, + .started = NULL, .completed = srcu_torture_completed, .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, @@ -600,7 +595,8 @@ static struct rcu_torture_ops sched_ops = { .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = rcu_no_completed, + .started = rcu_batches_started_sched, + .completed = rcu_batches_completed_sched, .deferred_free = rcu_sched_torture_deferred_free, .sync = synchronize_sched, .exp_sync = synchronize_sched_expedited, @@ -638,6 +634,7 @@ static struct rcu_torture_ops tasks_ops = { .readlock = tasks_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = tasks_torture_read_unlock, + .started = rcu_no_completed, .completed = rcu_no_completed, .deferred_free = rcu_tasks_torture_deferred_free, .sync = synchronize_rcu_tasks, @@ -1015,8 +1012,8 @@ static void rcutorture_trace_dump(void) static void rcu_torture_timer(unsigned long unused) { int idx; - int completed; - int completed_end; + unsigned long started; + unsigned long completed; static DEFINE_TORTURE_RANDOM(rand); static DEFINE_SPINLOCK(rand_lock); struct rcu_torture *p; @@ -1024,7 +1021,10 @@ static void rcu_torture_timer(unsigned long unused) unsigned long long ts; idx = cur_ops->readlock(); - completed = cur_ops->completed(); + if (cur_ops->started) + started = cur_ops->started(); + else + started = cur_ops->completed(); ts = rcu_trace_clock_local(); p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || @@ -1047,14 +1047,16 @@ static void rcu_torture_timer(unsigned long unused) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - completed_end = cur_ops->completed(); + completed = cur_ops->completed(); if (pipe_count > 1) { do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, - completed, completed_end); + started, completed); rcutorture_trace_dump(); } __this_cpu_inc(rcu_torture_count[pipe_count]); - completed = completed_end - completed; + completed = completed - started; + if (cur_ops->started) + completed++; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; @@ -1073,8 +1075,8 @@ static void rcu_torture_timer(unsigned long unused) static int rcu_torture_reader(void *arg) { - int completed; - int completed_end; + unsigned long started; + unsigned long completed; int idx; DEFINE_TORTURE_RANDOM(rand); struct rcu_torture *p; @@ -1093,7 +1095,10 @@ rcu_torture_reader(void *arg) mod_timer(&t, jiffies + 1); } idx = cur_ops->readlock(); - completed = cur_ops->completed(); + if (cur_ops->started) + started = cur_ops->started(); + else + started = cur_ops->completed(); ts = rcu_trace_clock_local(); p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || @@ -1114,14 +1119,16 @@ rcu_torture_reader(void *arg) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - completed_end = cur_ops->completed(); + completed = cur_ops->completed(); if (pipe_count > 1) { do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, - ts, completed, completed_end); + ts, started, completed); rcutorture_trace_dump(); } __this_cpu_inc(rcu_torture_count[pipe_count]); - completed = completed_end - completed; + completed = completed - started; + if (cur_ops->started) + completed++; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; @@ -1420,6 +1427,9 @@ static int rcu_torture_barrier(void *arg) cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { n_rcu_torture_barrier_error++; + pr_err("barrier_cbs_invoked = %d, n_barrier_cbs = %d\n", + atomic_read(&barrier_cbs_invoked), + n_barrier_cbs); WARN_ON_ONCE(1); } n_barrier_successes++; diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index e037f3eb2f7b..445bf8ffe3fb 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(srcu_barrier); * Report the number of batches, correlated with, but not necessarily * precisely the same as, the number of grace periods that have elapsed. */ -long srcu_batches_completed(struct srcu_struct *sp) +unsigned long srcu_batches_completed(struct srcu_struct *sp) { return sp->completed; } diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 0db5649f8817..cc9ceca7bde1 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -47,54 +47,14 @@ static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); -static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - #include "tiny_plugin.h" -/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */ -static void rcu_idle_enter_common(long long newval) -{ - if (newval) { - RCU_TRACE(trace_rcu_dyntick(TPS("--="), - rcu_dynticks_nesting, newval)); - rcu_dynticks_nesting = newval; - return; - } - RCU_TRACE(trace_rcu_dyntick(TPS("Start"), - rcu_dynticks_nesting, newval)); - if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - - RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), - rcu_dynticks_nesting, newval)); - ftrace_dump(DUMP_ALL); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } - rcu_sched_qs(); /* implies rcu_bh_inc() */ - barrier(); - rcu_dynticks_nesting = newval; -} - /* * Enter idle, which is an extended quiescent state if we have fully - * entered that mode (i.e., if the new value of dynticks_nesting is zero). + * entered that mode. */ void rcu_idle_enter(void) { - unsigned long flags; - long long newval; - - local_irq_save(flags); - WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); - if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == - DYNTICK_TASK_NEST_VALUE) - newval = 0; - else - newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE; - rcu_idle_enter_common(newval); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -103,55 +63,14 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); */ void rcu_irq_exit(void) { - unsigned long flags; - long long newval; - - local_irq_save(flags); - newval = rcu_dynticks_nesting - 1; - WARN_ON_ONCE(newval < 0); - rcu_idle_enter_common(newval); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_irq_exit); -/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */ -static void rcu_idle_exit_common(long long oldval) -{ - if (oldval) { - RCU_TRACE(trace_rcu_dyntick(TPS("++="), - oldval, rcu_dynticks_nesting)); - return; - } - RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); - if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - - RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), - oldval, rcu_dynticks_nesting)); - ftrace_dump(DUMP_ALL); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } -} - /* * Exit idle, so that we are no longer in an extended quiescent state. */ void rcu_idle_exit(void) { - unsigned long flags; - long long oldval; - - local_irq_save(flags); - oldval = rcu_dynticks_nesting; - WARN_ON_ONCE(rcu_dynticks_nesting < 0); - if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) - rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE; - else - rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - rcu_idle_exit_common(oldval); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_exit); @@ -160,15 +79,6 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit); */ void rcu_irq_enter(void) { - unsigned long flags; - long long oldval; - - local_irq_save(flags); - oldval = rcu_dynticks_nesting; - rcu_dynticks_nesting++; - WARN_ON_ONCE(rcu_dynticks_nesting == 0); - rcu_idle_exit_common(oldval); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_irq_enter); @@ -179,23 +89,13 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter); */ bool notrace __rcu_is_watching(void) { - return rcu_dynticks_nesting; + return true; } EXPORT_SYMBOL(__rcu_is_watching); #endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ /* - * Test whether the current CPU was interrupted from idle. Nested - * interrupts don't count, we must be running at the first interrupt - * level. - */ -static int rcu_is_cpu_rrupt_from_idle(void) -{ - return rcu_dynticks_nesting <= 1; -} - -/* * Helper function for rcu_sched_qs() and rcu_bh_qs(). * Also irqs are disabled to avoid confusion due to interrupt handlers * invoking call_rcu(). @@ -250,7 +150,7 @@ void rcu_bh_qs(void) void rcu_check_callbacks(int user) { RCU_TRACE(check_cpu_stalls()); - if (user || rcu_is_cpu_rrupt_from_idle()) + if (user) rcu_sched_qs(); else if (!in_softirq()) rcu_bh_qs(); @@ -357,6 +257,11 @@ static void __call_rcu(struct rcu_head *head, rcp->curtail = &head->next; RCU_TRACE(rcp->qlen++); local_irq_restore(flags); + + if (unlikely(is_idle_task(current))) { + /* force scheduling for rcu_sched_qs() */ + resched_cpu(0); + } } /* @@ -383,6 +288,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk)); + RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk)); rcu_early_boot_tests(); } diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 858c56569127..f94e209a10d6 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -145,17 +145,16 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp) rcp->ticks_this_gp++; j = jiffies; js = ACCESS_ONCE(rcp->jiffies_stall); - if (*rcp->curtail && ULONG_CMP_GE(j, js)) { + if (rcp->rcucblist && ULONG_CMP_GE(j, js)) { pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", - rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting, + rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE, jiffies - rcp->gp_start, rcp->qlen); dump_stack(); - } - if (*rcp->curtail && ULONG_CMP_GE(j, js)) ACCESS_ONCE(rcp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; - else if (ULONG_CMP_GE(j, js)) + } else if (ULONG_CMP_GE(j, js)) { ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); + } } static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7680fc275036..48d640ca1a05 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -156,6 +156,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); +/* rcuc/rcub kthread realtime priority */ +static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; +module_param(kthread_prio, int, 0644); + /* * Track the rcutorture test sequence number and the update version * number within a given test. The rcutorture_testseq is incremented @@ -215,6 +219,9 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ }; +DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); +EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); + /* * Let the RCU core know that this CPU has gone through the scheduler, * which is a quiescent state. This is called when the need for a @@ -284,6 +291,22 @@ void rcu_note_context_switch(void) } EXPORT_SYMBOL_GPL(rcu_note_context_switch); +/* + * Register a quiesecent state for all RCU flavors. If there is an + * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight + * dyntick-idle quiescent state visible to other CPUs (but only for those + * RCU flavors in desparate need of a quiescent state, which will normally + * be none of them). Either way, do a lightweight quiescent state for + * all RCU flavors. + */ +void rcu_all_qs(void) +{ + if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) + rcu_momentary_dyntick_idle(); + this_cpu_inc(rcu_qs_ctr); +} +EXPORT_SYMBOL_GPL(rcu_all_qs); + static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ static long qhimark = 10000; /* If this many pending, ignore blimit. */ static long qlowmark = 100; /* Once only this many pending, use blimit. */ @@ -315,18 +338,54 @@ static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(void); /* - * Return the number of RCU-sched batches processed thus far for debug & stats. + * Return the number of RCU batches started thus far for debug & stats. + */ +unsigned long rcu_batches_started(void) +{ + return rcu_state_p->gpnum; +} +EXPORT_SYMBOL_GPL(rcu_batches_started); + +/* + * Return the number of RCU-sched batches started thus far for debug & stats. + */ +unsigned long rcu_batches_started_sched(void) +{ + return rcu_sched_state.gpnum; +} +EXPORT_SYMBOL_GPL(rcu_batches_started_sched); + +/* + * Return the number of RCU BH batches started thus far for debug & stats. */ -long rcu_batches_completed_sched(void) +unsigned long rcu_batches_started_bh(void) +{ + return rcu_bh_state.gpnum; +} +EXPORT_SYMBOL_GPL(rcu_batches_started_bh); + +/* + * Return the number of RCU batches completed thus far for debug & stats. + */ +unsigned long rcu_batches_completed(void) +{ + return rcu_state_p->completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed); + +/* + * Return the number of RCU-sched batches completed thus far for debug & stats. + */ +unsigned long rcu_batches_completed_sched(void) { return rcu_sched_state.completed; } EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); /* - * Return the number of RCU BH batches processed thus far for debug & stats. + * Return the number of RCU BH batches completed thus far for debug & stats. */ -long rcu_batches_completed_bh(void) +unsigned long rcu_batches_completed_bh(void) { return rcu_bh_state.completed; } @@ -759,39 +818,71 @@ void rcu_irq_enter(void) /** * rcu_nmi_enter - inform RCU of entry to NMI context * - * If the CPU was idle with dynamic ticks active, and there is no - * irq handler running, this updates rdtp->dynticks_nmi to let the - * RCU grace-period handling know that the CPU is active. + * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and + * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know + * that the CPU is active. This implementation permits nested NMIs, as + * long as the nesting level does not overflow an int. (You will probably + * run out of stack space first.) */ void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + int incby = 2; - if (rdtp->dynticks_nmi_nesting == 0 && - (atomic_read(&rdtp->dynticks) & 0x1)) - return; - rdtp->dynticks_nmi_nesting++; - smp_mb__before_atomic(); /* Force delay from prior write. */ - atomic_inc(&rdtp->dynticks); - /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ - smp_mb__after_atomic(); /* See above. */ - WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + /* Complain about underflow. */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); + + /* + * If idle from RCU viewpoint, atomically increment ->dynticks + * to mark non-idle and increment ->dynticks_nmi_nesting by one. + * Otherwise, increment ->dynticks_nmi_nesting by two. This means + * if ->dynticks_nmi_nesting is equal to one, we are guaranteed + * to be in the outermost NMI handler that interrupted an RCU-idle + * period (observation due to Andy Lutomirski). + */ + if (!(atomic_read(&rdtp->dynticks) & 0x1)) { + smp_mb__before_atomic(); /* Force delay from prior write. */ + atomic_inc(&rdtp->dynticks); + /* atomic_inc() before later RCU read-side crit sects */ + smp_mb__after_atomic(); /* See above. */ + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + incby = 1; + } + rdtp->dynticks_nmi_nesting += incby; + barrier(); } /** * rcu_nmi_exit - inform RCU of exit from NMI context * - * If the CPU was idle with dynamic ticks active, and there is no - * irq handler running, this updates rdtp->dynticks_nmi to let the - * RCU grace-period handling know that the CPU is no longer active. + * If we are returning from the outermost NMI handler that interrupted an + * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting + * to let the RCU grace-period handling know that the CPU is back to + * being RCU-idle. */ void rcu_nmi_exit(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - if (rdtp->dynticks_nmi_nesting == 0 || - --rdtp->dynticks_nmi_nesting != 0) + /* + * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. + * (We are exiting an NMI handler, so RCU better be paying attention + * to us!) + */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + + /* + * If the nesting level is not 1, the CPU wasn't RCU-idle, so + * leave it in non-RCU-idle state. + */ + if (rdtp->dynticks_nmi_nesting != 1) { + rdtp->dynticks_nmi_nesting -= 2; return; + } + + /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ + rdtp->dynticks_nmi_nesting = 0; /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ smp_mb__before_atomic(); /* See above. */ atomic_inc(&rdtp->dynticks); @@ -898,17 +989,14 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); return 1; } else { + if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4, + rdp->mynode->gpnum)) + ACCESS_ONCE(rdp->gpwrap) = true; return 0; } } /* - * This function really isn't for public consumption, but RCU is special in - * that context switches can allow the state machine to make progress. - */ -extern void resched_cpu(int cpu); - -/* * Return true if the specified CPU has passed through a quiescent * state by virtue of being in or having passed through an dynticks * idle state since the last call to dyntick_save_progress_counter() @@ -1011,6 +1099,22 @@ static void record_gp_stall_check_time(struct rcu_state *rsp) j1 = rcu_jiffies_till_stall_check(); ACCESS_ONCE(rsp->jiffies_stall) = j + j1; rsp->jiffies_resched = j + j1 / 2; + rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs); +} + +/* + * Complain about starvation of grace-period kthread. + */ +static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) +{ + unsigned long gpa; + unsigned long j; + + j = jiffies; + gpa = ACCESS_ONCE(rsp->gp_activity); + if (j - gpa > 2 * HZ) + pr_err("%s kthread starved for %ld jiffies!\n", + rsp->name, j - gpa); } /* @@ -1033,11 +1137,13 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp) } } -static void print_other_cpu_stall(struct rcu_state *rsp) +static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) { int cpu; long delta; unsigned long flags; + unsigned long gpa; + unsigned long j; int ndetected = 0; struct rcu_node *rnp = rcu_get_root(rsp); long totqlen = 0; @@ -1075,30 +1181,34 @@ static void print_other_cpu_stall(struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp->lock, flags); } - /* - * Now rat on any tasks that got kicked up to the root rcu_node - * due to CPU offlining. - */ - rnp = rcu_get_root(rsp); - raw_spin_lock_irqsave(&rnp->lock, flags); - ndetected += rcu_print_task_stall(rnp); - raw_spin_unlock_irqrestore(&rnp->lock, flags); - print_cpu_stall_info_end(); for_each_possible_cpu(cpu) totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start), (long)rsp->gpnum, (long)rsp->completed, totqlen); - if (ndetected == 0) - pr_err("INFO: Stall ended before state dump start\n"); - else + if (ndetected) { rcu_dump_cpu_stacks(rsp); + } else { + if (ACCESS_ONCE(rsp->gpnum) != gpnum || + ACCESS_ONCE(rsp->completed) == gpnum) { + pr_err("INFO: Stall ended before state dump start\n"); + } else { + j = jiffies; + gpa = ACCESS_ONCE(rsp->gp_activity); + pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n", + rsp->name, j - gpa, j, gpa, + jiffies_till_next_fqs); + /* In this case, the current CPU might be at fault. */ + sched_show_task(current); + } + } /* Complain about tasks blocking the grace period. */ - rcu_print_detail_task_stall(rsp); + rcu_check_gp_kthread_starvation(rsp); + force_quiescent_state(rsp); /* Kick them all. */ } @@ -1123,6 +1233,9 @@ static void print_cpu_stall(struct rcu_state *rsp) pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", jiffies - rsp->gp_start, (long)rsp->gpnum, (long)rsp->completed, totqlen); + + rcu_check_gp_kthread_starvation(rsp); + rcu_dump_cpu_stacks(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); @@ -1193,7 +1306,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { /* They had a few time units to dump stack, so complain. */ - print_other_cpu_stall(rsp); + print_other_cpu_stall(rsp, gpnum); } } @@ -1530,7 +1643,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, bool ret; /* Handle the ends of any preceding grace periods first. */ - if (rdp->completed == rnp->completed) { + if (rdp->completed == rnp->completed && + !unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* No grace period end, so just accelerate recent callbacks. */ ret = rcu_accelerate_cbs(rsp, rnp, rdp); @@ -1545,7 +1659,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); } - if (rdp->gpnum != rnp->gpnum) { + if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* * If the current grace period is waiting for this CPU, * set up to detect a quiescent state, otherwise don't @@ -1554,8 +1668,10 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, rdp->gpnum = rnp->gpnum; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); rdp->passed_quiesce = 0; + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); + ACCESS_ONCE(rdp->gpwrap) = false; } return ret; } @@ -1569,7 +1685,8 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_save(flags); rnp = rdp->mynode; if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) && - rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */ + rdp->completed == ACCESS_ONCE(rnp->completed) && + !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */ !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ local_irq_restore(flags); return; @@ -1589,6 +1706,7 @@ static int rcu_gp_init(struct rcu_state *rsp) struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); + ACCESS_ONCE(rsp->gp_activity) = jiffies; rcu_bind_gp_kthread(); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); @@ -1649,6 +1767,7 @@ static int rcu_gp_init(struct rcu_state *rsp) rnp->grphi, rnp->qsmask); raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; } mutex_unlock(&rsp->onoff_mutex); @@ -1665,6 +1784,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) unsigned long maxj; struct rcu_node *rnp = rcu_get_root(rsp); + ACCESS_ONCE(rsp->gp_activity) = jiffies; rsp->n_force_qs++; if (fqs_state == RCU_SAVE_DYNTICK) { /* Collect dyntick-idle snapshots. */ @@ -1703,6 +1823,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); + ACCESS_ONCE(rsp->gp_activity) = jiffies; raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); gp_duration = jiffies - rsp->gp_start; @@ -1739,6 +1860,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); @@ -1788,6 +1910,7 @@ static int __noreturn rcu_gp_kthread(void *arg) if (rcu_gp_init(rsp)) break; cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -1831,9 +1954,11 @@ static int __noreturn rcu_gp_kthread(void *arg) ACCESS_ONCE(rsp->gpnum), TPS("fqsend")); cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; } else { /* Deal with stray signal. */ cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -2010,8 +2135,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || - rnp->completed == rnp->gpnum) { + if ((rdp->passed_quiesce == 0 && + rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) || + rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum || + rdp->gpwrap) { /* * The grace period in which this quiescent state was @@ -2020,6 +2147,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) * within the current grace period. */ rdp->passed_quiesce = 0; /* need qs for new gp. */ + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -2064,7 +2192,8 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) * Was there a quiescent state since the beginning of the grace * period? If no, then exit and wait for the next call. */ - if (!rdp->passed_quiesce) + if (!rdp->passed_quiesce && + rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) return; /* @@ -2195,6 +2324,46 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) } /* + * All CPUs for the specified rcu_node structure have gone offline, + * and all tasks that were preempted within an RCU read-side critical + * section while running on one of those CPUs have since exited their RCU + * read-side critical section. Some other CPU is reporting this fact with + * the specified rcu_node structure's ->lock held and interrupts disabled. + * This function therefore goes up the tree of rcu_node structures, + * clearing the corresponding bits in the ->qsmaskinit fields. Note that + * the leaf rcu_node structure's ->qsmaskinit field has already been + * updated + * + * This function does check that the specified rcu_node structure has + * all CPUs offline and no blocked tasks, so it is OK to invoke it + * prematurely. That said, invoking it after the fact will cost you + * a needless lock acquisition. So once it has done its work, don't + * invoke it again. + */ +static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) +{ + long mask; + struct rcu_node *rnp = rnp_leaf; + + if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) + return; + for (;;) { + mask = rnp->grpmask; + rnp = rnp->parent; + if (!rnp) + break; + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + smp_mb__after_unlock_lock(); /* GP memory ordering. */ + rnp->qsmaskinit &= ~mask; + if (rnp->qsmaskinit) { + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + return; + } + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + } +} + +/* * The CPU has been completely removed, and some other CPU is reporting * this fact from process context. Do the remainder of the cleanup, * including orphaning the outgoing CPU's RCU callbacks, and also @@ -2204,8 +2373,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) { unsigned long flags; - unsigned long mask; - int need_report = 0; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ @@ -2219,40 +2386,15 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); rcu_adopt_orphan_cbs(rsp, flags); + raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); - /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ - mask = rdp->grpmask; /* rnp->grplo is constant. */ - do { - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - smp_mb__after_unlock_lock(); - rnp->qsmaskinit &= ~mask; - if (rnp->qsmaskinit != 0) { - if (rnp != rdp->mynode) - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - break; - } - if (rnp == rdp->mynode) - need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); - else - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - mask = rnp->grpmask; - rnp = rnp->parent; - } while (rnp != NULL); - - /* - * We still hold the leaf rcu_node structure lock here, and - * irqs are still disabled. The reason for this subterfuge is - * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock - * held leads to deadlock. - */ - raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */ - rnp = rdp->mynode; - if (need_report & RCU_OFL_TASKS_NORM_GP) - rcu_report_unblock_qs_rnp(rnp, flags); - else - raw_spin_unlock_irqrestore(&rnp->lock, flags); - if (need_report & RCU_OFL_TASKS_EXP_GP) - rcu_report_exp_rnp(rsp, rnp, true); + /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ + raw_spin_lock_irqsave(&rnp->lock, flags); + smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */ + rnp->qsmaskinit &= ~rdp->grpmask; + if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp)) + rcu_cleanup_dead_rnp(rnp); + rcu_report_qs_rnp(rdp->grpmask, rsp, rnp, flags); /* Rlses rnp->lock. */ WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", cpu, rdp->qlen, rdp->nxtlist); @@ -2268,6 +2410,10 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) { } +static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) +{ +} + static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) { } @@ -2464,12 +2610,6 @@ static void force_qs_rnp(struct rcu_state *rsp, } raw_spin_unlock_irqrestore(&rnp->lock, flags); } - rnp = rcu_get_root(rsp); - if (rnp->qsmask == 0) { - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); - rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ - } } /* @@ -2569,7 +2709,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) * Schedule RCU callback invocation. If the specified type of RCU * does not support RCU priority boosting, just do a direct call, * otherwise wake up the per-CPU kernel kthread. Note that because we - * are running on the current CPU with interrupts disabled, the + * are running on the current CPU with softirqs disabled, the * rcu_cpu_kthread_task cannot disappear out from under us. */ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) @@ -3109,9 +3249,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && - rdp->qs_pending && !rdp->passed_quiesce) { + rdp->qs_pending && !rdp->passed_quiesce && + rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { rdp->n_rp_qs_pending++; - } else if (rdp->qs_pending && rdp->passed_quiesce) { + } else if (rdp->qs_pending && + (rdp->passed_quiesce || + rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { rdp->n_rp_report_qs++; return 1; } @@ -3135,7 +3278,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) } /* Has a new RCU grace period started? */ - if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ + if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum || + unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */ rdp->n_rp_gp_started++; return 1; } @@ -3318,6 +3462,7 @@ static void _rcu_barrier(struct rcu_state *rsp) } else { _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, rsp->n_barrier_done); + smp_mb__before_atomic(); atomic_inc(&rsp->barrier_cpu_count); __call_rcu(&rdp->barrier_head, rcu_barrier_callback, rsp, cpu, 0); @@ -3385,9 +3530,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); - init_callback_list(rdp); - rdp->qlen_lazy = 0; - ACCESS_ONCE(rdp->qlen) = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); @@ -3444,6 +3586,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->gpnum = rnp->completed; rdp->completed = rnp->completed; rdp->passed_quiesce = 0; + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); rdp->qs_pending = 0; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); } @@ -3535,17 +3678,35 @@ static int rcu_pm_notify(struct notifier_block *self, static int __init rcu_spawn_gp_kthread(void) { unsigned long flags; + int kthread_prio_in = kthread_prio; struct rcu_node *rnp; struct rcu_state *rsp; + struct sched_param sp; struct task_struct *t; + /* Force priority into range. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) + kthread_prio = 1; + else if (kthread_prio < 0) + kthread_prio = 0; + else if (kthread_prio > 99) + kthread_prio = 99; + if (kthread_prio != kthread_prio_in) + pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n", + kthread_prio, kthread_prio_in); + rcu_scheduler_fully_active = 1; for_each_rcu_flavor(rsp) { - t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); + t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name); BUG_ON(IS_ERR(t)); rnp = rcu_get_root(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); rsp->gp_kthread = t; + if (kthread_prio) { + sp.sched_priority = kthread_prio; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + } + wake_up_process(t); raw_spin_unlock_irqrestore(&rnp->lock, flags); } rcu_spawn_nocb_kthreads(); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 8e7b1843896e..119de399eb2f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -27,7 +27,6 @@ #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/seqlock.h> -#include <linux/irq_work.h> /* * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and @@ -172,11 +171,6 @@ struct rcu_node { /* queued on this rcu_node structure that */ /* are blocking the current grace period, */ /* there can be no such task. */ - struct completion boost_completion; - /* Used to ensure that the rt_mutex used */ - /* to carry out the boosting is fully */ - /* released with no future boostee accesses */ - /* before that rt_mutex is re-initialized. */ struct rt_mutex boost_mtx; /* Used only for the priority-boosting */ /* side effect, not as a lock. */ @@ -257,9 +251,12 @@ struct rcu_data { /* in order to detect GP end. */ unsigned long gpnum; /* Highest gp number that this CPU */ /* is aware of having started. */ + unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ + /* for rcu_all_qs() invocations. */ bool passed_quiesce; /* User-mode/idle loop etc. */ bool qs_pending; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ + bool gpwrap; /* Possible gpnum/completed wrap. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ #ifdef CONFIG_RCU_CPU_STALL_INFO @@ -340,14 +337,10 @@ struct rcu_data { #ifdef CONFIG_RCU_NOCB_CPU struct rcu_head *nocb_head; /* CBs waiting for kthread. */ struct rcu_head **nocb_tail; - atomic_long_t nocb_q_count; /* # CBs waiting for kthread */ - atomic_long_t nocb_q_count_lazy; /* (approximate). */ + atomic_long_t nocb_q_count; /* # CBs waiting for nocb */ + atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */ struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */ struct rcu_head **nocb_follower_tail; - atomic_long_t nocb_follower_count; /* # CBs ready to invoke. */ - atomic_long_t nocb_follower_count_lazy; /* (approximate). */ - int nocb_p_count; /* # CBs being invoked by kthread */ - int nocb_p_count_lazy; /* (approximate). */ wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ struct task_struct *nocb_kthread; int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ @@ -356,8 +349,6 @@ struct rcu_data { struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; /* CBs waiting for GP. */ struct rcu_head **nocb_gp_tail; - long nocb_gp_count; - long nocb_gp_count_lazy; bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */ struct rcu_data *nocb_next_follower; /* Next follower in wakeup chain. */ @@ -488,10 +479,14 @@ struct rcu_state { /* due to no GP active. */ unsigned long gp_start; /* Time at which GP started, */ /* but in jiffies. */ + unsigned long gp_activity; /* Time of last GP kthread */ + /* activity in jiffies. */ unsigned long jiffies_stall; /* Time at which to check */ /* for CPU stalls. */ unsigned long jiffies_resched; /* Time at which to resched */ /* a reluctant CPU. */ + unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */ + /* GP start. */ unsigned long gp_max; /* Maximum GP duration in */ /* jiffies. */ const char *name; /* Name of structure. */ @@ -514,13 +509,6 @@ extern struct list_head rcu_struct_flavors; #define for_each_rcu_flavor(rsp) \ list_for_each_entry((rsp), &rcu_struct_flavors, flavors) -/* Return values for rcu_preempt_offline_tasks(). */ - -#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ - /* GP were moved to root. */ -#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ - /* GP were moved to root. */ - /* * RCU implementation internal declarations: */ @@ -546,27 +534,16 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); /* Forward declarations for rcutree_plugin.h */ static void rcu_bootup_announce(void); -long rcu_batches_completed(void); static void rcu_preempt_note_context_switch(void); static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU -static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, - unsigned long flags); +static bool rcu_preempt_has_tasks(struct rcu_node *rnp); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); -#ifdef CONFIG_HOTPLUG_CPU -static int rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp, - struct rcu_data *rdp); -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_preempt_check_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) -static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, - bool wake); -#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) */ static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); @@ -622,24 +599,15 @@ static void rcu_dynticks_task_exit(void); #endif /* #ifndef RCU_TREE_NONCORE */ #ifdef CONFIG_RCU_TRACE -#ifdef CONFIG_RCU_NOCB_CPU -/* Sum up queue lengths for tracing. */ +/* Read out queue lengths for tracing. */ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) { - *ql = atomic_long_read(&rdp->nocb_q_count) + - rdp->nocb_p_count + - atomic_long_read(&rdp->nocb_follower_count) + - rdp->nocb_p_count + rdp->nocb_gp_count; - *qll = atomic_long_read(&rdp->nocb_q_count_lazy) + - rdp->nocb_p_count_lazy + - atomic_long_read(&rdp->nocb_follower_count_lazy) + - rdp->nocb_p_count_lazy + rdp->nocb_gp_count_lazy; -} +#ifdef CONFIG_RCU_NOCB_CPU + *ql = atomic_long_read(&rdp->nocb_q_count); + *qll = atomic_long_read(&rdp->nocb_q_count_lazy); #else /* #ifdef CONFIG_RCU_NOCB_CPU */ -static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) -{ *ql = 0; *qll = 0; -} #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ +} #endif /* #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3ec85cb5d544..2e850a51bb8f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -34,10 +34,6 @@ #include "../locking/rtmutex_common.h" -/* rcuc/rcub kthread realtime priority */ -static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; -module_param(kthread_prio, int, 0644); - /* * Control variables for per-CPU and per-rcu_node kthreads. These * handle all flavors of RCU. @@ -103,6 +99,8 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); static struct rcu_state *rcu_state_p = &rcu_preempt_state; static int rcu_preempted_readers_exp(struct rcu_node *rnp); +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, + bool wake); /* * Tell them what RCU they are running. @@ -114,25 +112,6 @@ static void __init rcu_bootup_announce(void) } /* - * Return the number of RCU-preempt batches processed thus far - * for debug and statistics. - */ -static long rcu_batches_completed_preempt(void) -{ - return rcu_preempt_state.completed; -} -EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); - -/* - * Return the number of RCU batches processed thus far for debug & stats. - */ -long rcu_batches_completed(void) -{ - return rcu_batches_completed_preempt(); -} -EXPORT_SYMBOL_GPL(rcu_batches_completed); - -/* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is * not in a quiescent state. There might be any number of tasks blocked @@ -307,15 +286,25 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t, } /* + * Return true if the specified rcu_node structure has tasks that were + * preempted within an RCU read-side critical section. + */ +static bool rcu_preempt_has_tasks(struct rcu_node *rnp) +{ + return !list_empty(&rnp->blkd_tasks); +} + +/* * Handle special cases during rcu_read_unlock(), such as needing to * notify RCU core processing or task having blocked during the RCU * read-side critical section. */ void rcu_read_unlock_special(struct task_struct *t) { - int empty; - int empty_exp; - int empty_exp_now; + bool empty; + bool empty_exp; + bool empty_norm; + bool empty_exp_now; unsigned long flags; struct list_head *np; #ifdef CONFIG_RCU_BOOST @@ -367,7 +356,8 @@ void rcu_read_unlock_special(struct task_struct *t) break; raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } - empty = !rcu_preempt_blocked_readers_cgp(rnp); + empty = !rcu_preempt_has_tasks(rnp); + empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); empty_exp = !rcu_preempted_readers_exp(rnp); smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ np = rcu_next_node_entry(t, rnp); @@ -387,13 +377,21 @@ void rcu_read_unlock_special(struct task_struct *t) #endif /* #ifdef CONFIG_RCU_BOOST */ /* + * If this was the last task on the list, go see if we + * need to propagate ->qsmaskinit bit clearing up the + * rcu_node tree. + */ + if (!empty && !rcu_preempt_has_tasks(rnp)) + rcu_cleanup_dead_rnp(rnp); + + /* * If this was the last task on the current list, and if * we aren't waiting on any CPUs, report the quiescent state. * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, * so we must take a snapshot of the expedited state. */ empty_exp_now = !rcu_preempted_readers_exp(rnp); - if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { + if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) { trace_rcu_quiescent_state_report(TPS("preempt_rcu"), rnp->gpnum, 0, rnp->qsmask, @@ -408,10 +406,8 @@ void rcu_read_unlock_special(struct task_struct *t) #ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ - if (drop_boost_mutex) { + if (drop_boost_mutex) rt_mutex_unlock(&rnp->boost_mtx); - complete(&rnp->boost_completion); - } #endif /* #ifdef CONFIG_RCU_BOOST */ /* @@ -519,99 +515,13 @@ static int rcu_print_task_stall(struct rcu_node *rnp) static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) { WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); - if (!list_empty(&rnp->blkd_tasks)) + if (rcu_preempt_has_tasks(rnp)) rnp->gp_tasks = rnp->blkd_tasks.next; WARN_ON_ONCE(rnp->qsmask); } #ifdef CONFIG_HOTPLUG_CPU -/* - * Handle tasklist migration for case in which all CPUs covered by the - * specified rcu_node have gone offline. Move them up to the root - * rcu_node. The reason for not just moving them to the immediate - * parent is to remove the need for rcu_read_unlock_special() to - * make more than two attempts to acquire the target rcu_node's lock. - * Returns true if there were tasks blocking the current RCU grace - * period. - * - * Returns 1 if there was previously a task blocking the current grace - * period on the specified rcu_node structure. - * - * The caller must hold rnp->lock with irqs disabled. - */ -static int rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp, - struct rcu_data *rdp) -{ - struct list_head *lp; - struct list_head *lp_root; - int retval = 0; - struct rcu_node *rnp_root = rcu_get_root(rsp); - struct task_struct *t; - - if (rnp == rnp_root) { - WARN_ONCE(1, "Last CPU thought to be offlined?"); - return 0; /* Shouldn't happen: at least one CPU online. */ - } - - /* If we are on an internal node, complain bitterly. */ - WARN_ON_ONCE(rnp != rdp->mynode); - - /* - * Move tasks up to root rcu_node. Don't try to get fancy for - * this corner-case operation -- just put this node's tasks - * at the head of the root node's list, and update the root node's - * ->gp_tasks and ->exp_tasks pointers to those of this node's, - * if non-NULL. This might result in waiting for more tasks than - * absolutely necessary, but this is a good performance/complexity - * tradeoff. - */ - if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0) - retval |= RCU_OFL_TASKS_NORM_GP; - if (rcu_preempted_readers_exp(rnp)) - retval |= RCU_OFL_TASKS_EXP_GP; - lp = &rnp->blkd_tasks; - lp_root = &rnp_root->blkd_tasks; - while (!list_empty(lp)) { - t = list_entry(lp->next, typeof(*t), rcu_node_entry); - raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ - smp_mb__after_unlock_lock(); - list_del(&t->rcu_node_entry); - t->rcu_blocked_node = rnp_root; - list_add(&t->rcu_node_entry, lp_root); - if (&t->rcu_node_entry == rnp->gp_tasks) - rnp_root->gp_tasks = rnp->gp_tasks; - if (&t->rcu_node_entry == rnp->exp_tasks) - rnp_root->exp_tasks = rnp->exp_tasks; -#ifdef CONFIG_RCU_BOOST - if (&t->rcu_node_entry == rnp->boost_tasks) - rnp_root->boost_tasks = rnp->boost_tasks; -#endif /* #ifdef CONFIG_RCU_BOOST */ - raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ - } - - rnp->gp_tasks = NULL; - rnp->exp_tasks = NULL; -#ifdef CONFIG_RCU_BOOST - rnp->boost_tasks = NULL; - /* - * In case root is being boosted and leaf was not. Make sure - * that we boost the tasks blocking the current grace period - * in this case. - */ - raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ - smp_mb__after_unlock_lock(); - if (rnp_root->boost_tasks != NULL && - rnp_root->boost_tasks != rnp_root->gp_tasks && - rnp_root->boost_tasks != rnp_root->exp_tasks) - rnp_root->boost_tasks = rnp_root->gp_tasks; - raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ -#endif /* #ifdef CONFIG_RCU_BOOST */ - - return retval; -} - #endif /* #ifdef CONFIG_HOTPLUG_CPU */ /* @@ -771,7 +681,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - if (list_empty(&rnp->blkd_tasks)) { + if (!rcu_preempt_has_tasks(rnp)) { raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { rnp->exp_tasks = rnp->blkd_tasks.next; @@ -933,15 +843,6 @@ static void __init rcu_bootup_announce(void) } /* - * Return the number of RCU batches processed thus far for debug & stats. - */ -long rcu_batches_completed(void) -{ - return rcu_batches_completed_sched(); -} -EXPORT_SYMBOL_GPL(rcu_batches_completed); - -/* * Because preemptible RCU does not exist, we never have to check for * CPUs being in quiescent states. */ @@ -960,11 +861,12 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) #ifdef CONFIG_HOTPLUG_CPU -/* Because preemptible RCU does not exist, no quieting of tasks. */ -static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) - __releases(rnp->lock) +/* + * Because there is no preemptible RCU, there can be no readers blocked. + */ +static bool rcu_preempt_has_tasks(struct rcu_node *rnp) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); + return false; } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -996,23 +898,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) WARN_ON_ONCE(rnp->qsmask); } -#ifdef CONFIG_HOTPLUG_CPU - -/* - * Because preemptible RCU does not exist, it never needs to migrate - * tasks that were blocked within RCU read-side critical sections, and - * such non-existent tasks cannot possibly have been blocking the current - * grace period. - */ -static int rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp, - struct rcu_data *rdp) -{ - return 0; -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -1031,20 +916,6 @@ void synchronize_rcu_expedited(void) } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -#ifdef CONFIG_HOTPLUG_CPU - -/* - * Because preemptible RCU does not exist, there is never any need to - * report on tasks preempted in RCU read-side critical sections during - * expedited RCU grace periods. - */ -static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, - bool wake) -{ -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - /* * Because preemptible RCU does not exist, rcu_barrier() is just * another name for rcu_barrier_sched(). @@ -1080,7 +951,7 @@ void exit_rcu(void) static void rcu_initiate_boost_trace(struct rcu_node *rnp) { - if (list_empty(&rnp->blkd_tasks)) + if (!rcu_preempt_has_tasks(rnp)) rnp->n_balk_blkd_tasks++; else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) rnp->n_balk_exp_gp_tasks++; @@ -1127,7 +998,8 @@ static int rcu_boost(struct rcu_node *rnp) struct task_struct *t; struct list_head *tb; - if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) + if (ACCESS_ONCE(rnp->exp_tasks) == NULL && + ACCESS_ONCE(rnp->boost_tasks) == NULL) return 0; /* Nothing left to boost. */ raw_spin_lock_irqsave(&rnp->lock, flags); @@ -1175,15 +1047,11 @@ static int rcu_boost(struct rcu_node *rnp) */ t = container_of(tb, struct task_struct, rcu_node_entry); rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); - init_completion(&rnp->boost_completion); raw_spin_unlock_irqrestore(&rnp->lock, flags); /* Lock only for side effect: boosts task t's priority. */ rt_mutex_lock(&rnp->boost_mtx); rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ - /* Wait for boostee to be done w/boost_mtx before reinitializing. */ - wait_for_completion(&rnp->boost_completion); - return ACCESS_ONCE(rnp->exp_tasks) != NULL || ACCESS_ONCE(rnp->boost_tasks) != NULL; } @@ -1416,12 +1284,8 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) if ((mask & 0x1) && cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); - if (cpumask_weight(cm) == 0) { + if (cpumask_weight(cm) == 0) cpumask_setall(cm); - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) - cpumask_clear_cpu(cpu, cm); - WARN_ON_ONCE(cpumask_weight(cm) == 0); - } set_cpus_allowed_ptr(t, cm); free_cpumask_var(cm); } @@ -1446,12 +1310,8 @@ static void __init rcu_spawn_boost_kthreads(void) for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); - rnp = rcu_get_root(rcu_state_p); - (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); - if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state_p, rnp) - (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); - } + rcu_for_each_leaf_node(rcu_state_p, rnp) + (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } static void rcu_prepare_kthreads(int cpu) @@ -1605,7 +1465,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) * completed since we last checked and there are * callbacks not yet ready to invoke. */ - if (rdp->completed != rnp->completed && + if ((rdp->completed != rnp->completed || + unlikely(ACCESS_ONCE(rdp->gpwrap))) && rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) note_gp_changes(rsp, rdp); @@ -1898,11 +1759,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) ticks_value = rsp->gpnum - rdp->gpnum; } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); - pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", + pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", cpu, ticks_value, ticks_title, atomic_read(&rdtp->dynticks) & 0xfff, rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), + ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart, fast_no_hz); } @@ -2056,9 +1918,26 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) { struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + unsigned long ret; +#ifdef CONFIG_PROVE_RCU struct rcu_head *rhp; +#endif /* #ifdef CONFIG_PROVE_RCU */ - /* No-CBs CPUs might have callbacks on any of three lists. */ + /* + * Check count of all no-CBs callbacks awaiting invocation. + * There needs to be a barrier before this function is called, + * but associated with a prior determination that no more + * callbacks would be posted. In the worst case, the first + * barrier in _rcu_barrier() suffices (but the caller cannot + * necessarily rely on this, not a substitute for the caller + * getting the concurrency design right!). There must also be + * a barrier between the following load an posting of a callback + * (if a callback is in fact needed). This is associated with an + * atomic_inc() in the caller. + */ + ret = atomic_long_read(&rdp->nocb_q_count); + +#ifdef CONFIG_PROVE_RCU rhp = ACCESS_ONCE(rdp->nocb_head); if (!rhp) rhp = ACCESS_ONCE(rdp->nocb_gp_head); @@ -2072,8 +1951,9 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) cpu, rhp->func); WARN_ON_ONCE(1); } +#endif /* #ifdef CONFIG_PROVE_RCU */ - return !!rhp; + return !!ret; } /* @@ -2095,9 +1975,10 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, struct task_struct *t; /* Enqueue the callback on the nocb list and update counts. */ + atomic_long_add(rhcount, &rdp->nocb_q_count); + /* rcu_barrier() relies on ->nocb_q_count add before xchg. */ old_rhpp = xchg(&rdp->nocb_tail, rhtp); ACCESS_ONCE(*old_rhpp) = rhp; - atomic_long_add(rhcount, &rdp->nocb_q_count); atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ @@ -2288,9 +2169,6 @@ wait_again: /* Move callbacks to wait-for-GP list, which is empty. */ ACCESS_ONCE(rdp->nocb_head) = NULL; rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); - rdp->nocb_gp_count = atomic_long_xchg(&rdp->nocb_q_count, 0); - rdp->nocb_gp_count_lazy = - atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); gotcbs = true; } @@ -2338,9 +2216,6 @@ wait_again: /* Append callbacks to follower's "done" list. */ tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail); *tail = rdp->nocb_gp_head; - atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count); - atomic_long_add(rdp->nocb_gp_count_lazy, - &rdp->nocb_follower_count_lazy); smp_mb__after_atomic(); /* Store *tail before wakeup. */ if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { /* @@ -2415,13 +2290,11 @@ static int rcu_nocb_kthread(void *arg) trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); ACCESS_ONCE(rdp->nocb_follower_head) = NULL; tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); - c = atomic_long_xchg(&rdp->nocb_follower_count, 0); - cl = atomic_long_xchg(&rdp->nocb_follower_count_lazy, 0); - rdp->nocb_p_count += c; - rdp->nocb_p_count_lazy += cl; /* Each pass through the following loop invokes a callback. */ - trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); + trace_rcu_batch_start(rdp->rsp->name, + atomic_long_read(&rdp->nocb_q_count_lazy), + atomic_long_read(&rdp->nocb_q_count), -1); c = cl = 0; while (list) { next = list->next; @@ -2443,9 +2316,9 @@ static int rcu_nocb_kthread(void *arg) list = next; } trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); - ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c; - ACCESS_ONCE(rdp->nocb_p_count_lazy) = - rdp->nocb_p_count_lazy - cl; + smp_mb__before_atomic(); /* _add after CB invocation. */ + atomic_long_add(-c, &rdp->nocb_q_count); + atomic_long_add(-cl, &rdp->nocb_q_count_lazy); rdp->n_nocbs_invoked += c; } return 0; diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 5cdc62e1beeb..fbb6240509ea 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -46,6 +46,8 @@ #define RCU_TREE_NONCORE #include "tree.h" +DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); + static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) { @@ -115,11 +117,13 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) if (!rdp->beenonline) return; - seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d", + seq_printf(m, "%3d%cc=%ld g=%ld pq=%d/%d qp=%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', ulong2long(rdp->completed), ulong2long(rdp->gpnum), - rdp->passed_quiesce, rdp->qs_pending); + rdp->passed_quiesce, + rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu), + rdp->qs_pending); seq_printf(m, " dt=%d/%llx/%d df=%lu", atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 607f852b4d04..7052d3fd4e7b 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -268,6 +268,15 @@ bool try_wait_for_completion(struct completion *x) unsigned long flags; int ret = 1; + /* + * Since x->done will need to be locked only + * in the non-blocking case, we check x->done + * first without taking the lock so we can + * return early in the blocking case. + */ + if (!ACCESS_ONCE(x->done)) + return 0; + spin_lock_irqsave(&x->wait.lock, flags); if (!x->done) ret = 0; @@ -288,13 +297,6 @@ EXPORT_SYMBOL(try_wait_for_completion); */ bool completion_done(struct completion *x) { - unsigned long flags; - int ret = 1; - - spin_lock_irqsave(&x->wait.lock, flags); - if (!x->done) - ret = 0; - spin_unlock_irqrestore(&x->wait.lock, flags); - return ret; + return !!ACCESS_ONCE(x->done); } EXPORT_SYMBOL(completion_done); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5eab11d4b747..1f37fe7f77a4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq) { s64 delta; - if (rq->skip_clock_update > 0) + lockdep_assert_held(&rq->lock); + + if (rq->clock_skip_update & RQCF_ACT_SKIP) return; delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; @@ -490,6 +492,11 @@ static __init void init_hrtick(void) */ void hrtick_start(struct rq *rq, u64 delay) { + /* + * Don't schedule slices shorter than 10000ns, that just + * doesn't make sense. Rely on vruntime for fairness. + */ + delay = max_t(u64, delay, 10000LL); __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, HRTIMER_MODE_REL_PINNED, 0); } @@ -1046,7 +1053,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) * this case, we can save a useless back to back clock update. */ if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) - rq->skip_clock_update = 1; + rq_clock_skip_update(rq, true); } #ifdef CONFIG_SMP @@ -1082,7 +1089,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; - perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); + perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); } __set_task_cpu(p, new_cpu); @@ -1836,6 +1843,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; p->se.vruntime = 0; +#ifdef CONFIG_SMP + p->se.avg.decay_count = 0; +#endif INIT_LIST_HEAD(&p->se.group_node); #ifdef CONFIG_SCHEDSTATS @@ -2755,6 +2765,10 @@ again: * - explicit schedule() call * - return from syscall or exception to user-space * - return from interrupt-handler to user-space + * + * WARNING: all callers must re-check need_resched() afterward and reschedule + * accordingly in case an event triggered the need for rescheduling (such as + * an interrupt waking up a task) while preemption was disabled in __schedule(). */ static void __sched __schedule(void) { @@ -2763,7 +2777,6 @@ static void __sched __schedule(void) struct rq *rq; int cpu; -need_resched: preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); @@ -2783,6 +2796,8 @@ need_resched: smp_mb__before_spinlock(); raw_spin_lock_irq(&rq->lock); + rq->clock_skip_update <<= 1; /* promote REQ to ACT */ + switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely(signal_pending_state(prev->state, prev))) { @@ -2807,13 +2822,13 @@ need_resched: switch_count = &prev->nvcsw; } - if (task_on_rq_queued(prev) || rq->skip_clock_update < 0) + if (task_on_rq_queued(prev)) update_rq_clock(rq); next = pick_next_task(rq, prev); clear_tsk_need_resched(prev); clear_preempt_need_resched(); - rq->skip_clock_update = 0; + rq->clock_skip_update = 0; if (likely(prev != next)) { rq->nr_switches++; @@ -2828,8 +2843,6 @@ need_resched: post_schedule(rq); sched_preempt_enable_no_resched(); - if (need_resched()) - goto need_resched; } static inline void sched_submit_work(struct task_struct *tsk) @@ -2849,7 +2862,9 @@ asmlinkage __visible void __sched schedule(void) struct task_struct *tsk = current; sched_submit_work(tsk); - __schedule(); + do { + __schedule(); + } while (need_resched()); } EXPORT_SYMBOL(schedule); @@ -2884,6 +2899,21 @@ void __sched schedule_preempt_disabled(void) preempt_disable(); } +static void preempt_schedule_common(void) +{ + do { + __preempt_count_add(PREEMPT_ACTIVE); + __schedule(); + __preempt_count_sub(PREEMPT_ACTIVE); + + /* + * Check again in case we missed a preemption opportunity + * between schedule and now. + */ + barrier(); + } while (need_resched()); +} + #ifdef CONFIG_PREEMPT /* * this is the entry point to schedule() from in-kernel preemption @@ -2899,17 +2929,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) if (likely(!preemptible())) return; - do { - __preempt_count_add(PREEMPT_ACTIVE); - __schedule(); - __preempt_count_sub(PREEMPT_ACTIVE); - - /* - * Check again in case we missed a preemption opportunity - * between schedule and now. - */ - barrier(); - } while (need_resched()); + preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule); @@ -3405,6 +3425,20 @@ static bool check_same_owner(struct task_struct *p) return match; } +static bool dl_param_changed(struct task_struct *p, + const struct sched_attr *attr) +{ + struct sched_dl_entity *dl_se = &p->dl; + + if (dl_se->dl_runtime != attr->sched_runtime || + dl_se->dl_deadline != attr->sched_deadline || + dl_se->dl_period != attr->sched_period || + dl_se->flags != attr->sched_flags) + return true; + + return false; +} + static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user) @@ -3533,7 +3567,7 @@ recheck: goto change; if (rt_policy(policy) && attr->sched_priority != p->rt_priority) goto change; - if (dl_policy(policy)) + if (dl_policy(policy) && dl_param_changed(p, attr)) goto change; p->sched_reset_on_fork = reset_on_fork; @@ -4225,17 +4259,10 @@ SYSCALL_DEFINE0(sched_yield) return 0; } -static void __cond_resched(void) -{ - __preempt_count_add(PREEMPT_ACTIVE); - __schedule(); - __preempt_count_sub(PREEMPT_ACTIVE); -} - int __sched _cond_resched(void) { if (should_resched()) { - __cond_resched(); + preempt_schedule_common(); return 1; } return 0; @@ -4260,7 +4287,7 @@ int __cond_resched_lock(spinlock_t *lock) if (spin_needbreak(lock) || resched) { spin_unlock(lock); if (resched) - __cond_resched(); + preempt_schedule_common(); else cpu_relax(); ret = 1; @@ -4276,7 +4303,7 @@ int __sched __cond_resched_softirq(void) if (should_resched()) { local_bh_enable(); - __cond_resched(); + preempt_schedule_common(); local_bh_disable(); return 1; } @@ -4531,9 +4558,10 @@ void sched_show_task(struct task_struct *p) { unsigned long free = 0; int ppid; - unsigned state; + unsigned long state = p->state; - state = p->state ? __ffs(p->state) + 1 : 0; + if (state) + state = __ffs(state) + 1; printk(KERN_INFO "%-15.15s %c", p->comm, state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); #if BITS_PER_LONG == 32 @@ -4766,7 +4794,7 @@ static struct rq *move_queued_task(struct task_struct *p, int new_cpu) void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { - if (p->sched_class && p->sched_class->set_cpus_allowed) + if (p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); cpumask_copy(&p->cpus_allowed, new_mask); @@ -7276,6 +7304,11 @@ void __init sched_init(void) enter_lazy_tlb(&init_mm, current); /* + * During early bootup we pretend to be a normal task: + */ + current->sched_class = &fair_sched_class; + + /* * Make us the idle thread. Technically, schedule() should not be * called from this thread, however somewhere below it might be, * but because we are the idle thread, we just pick up running again @@ -7285,11 +7318,6 @@ void __init sched_init(void) calc_load_update = jiffies + LOAD_FREQ; - /* - * During early bootup we pretend to be a normal task: - */ - current->sched_class = &fair_sched_class; - #ifdef CONFIG_SMP zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); /* May be allocated at isolcpus cmdline parse time */ @@ -7350,6 +7378,9 @@ void ___might_sleep(const char *file, int line, int preempt_offset) in_atomic(), irqs_disabled(), current->pid, current->comm); + if (task_stack_end_corrupted(current)) + printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); + debug_show_held_locks(current); if (irqs_disabled()) print_irqtrace_events(current); diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 539ca3ce071b..c6acb07466bb 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -107,7 +107,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, int best_cpu = -1; const struct sched_dl_entity *dl_se = &p->dl; - if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) { + if (later_mask && + cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { best_cpu = cpumask_any(later_mask); goto out; } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && @@ -186,6 +187,26 @@ out: } /* + * cpudl_set_freecpu - Set the cpudl.free_cpus + * @cp: the cpudl max-heap context + * @cpu: rd attached cpu + */ +void cpudl_set_freecpu(struct cpudl *cp, int cpu) +{ + cpumask_set_cpu(cpu, cp->free_cpus); +} + +/* + * cpudl_clear_freecpu - Clear the cpudl.free_cpus + * @cp: the cpudl max-heap context + * @cpu: rd attached cpu + */ +void cpudl_clear_freecpu(struct cpudl *cp, int cpu) +{ + cpumask_clear_cpu(cpu, cp->free_cpus); +} + +/* * cpudl_init - initialize the cpudl structure * @cp: the cpudl max-heap context */ @@ -203,7 +224,7 @@ int cpudl_init(struct cpudl *cp) if (!cp->elements) return -ENOMEM; - if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { kfree(cp->elements); return -ENOMEM; } @@ -211,8 +232,6 @@ int cpudl_init(struct cpudl *cp) for_each_possible_cpu(i) cp->elements[i].idx = IDX_INVALID; - cpumask_setall(cp->free_cpus); - return 0; } diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index 020039bd1326..1a0a6ef2fbe1 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h @@ -24,6 +24,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask); void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid); int cpudl_init(struct cpudl *cp); +void cpudl_set_freecpu(struct cpudl *cp, int cpu); +void cpudl_clear_freecpu(struct cpudl *cp, int cpu); void cpudl_cleanup(struct cpudl *cp); #endif /* CONFIG_SMP */ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 726470d47f87..a027799ae130 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -350,6 +350,11 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; dl_se->runtime = pi_se->dl_runtime; } + + if (dl_se->dl_yielded) + dl_se->dl_yielded = 0; + if (dl_se->dl_throttled) + dl_se->dl_throttled = 0; } /* @@ -536,23 +541,19 @@ again: sched_clock_tick(); update_rq_clock(rq); - dl_se->dl_throttled = 0; - dl_se->dl_yielded = 0; - if (task_on_rq_queued(p)) { - enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); - if (dl_task(rq->curr)) - check_preempt_curr_dl(rq, p, 0); - else - resched_curr(rq); + enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); + if (dl_task(rq->curr)) + check_preempt_curr_dl(rq, p, 0); + else + resched_curr(rq); #ifdef CONFIG_SMP - /* - * Queueing this task back might have overloaded rq, - * check if we need to kick someone away. - */ - if (has_pushable_dl_tasks(rq)) - push_dl_task(rq); + /* + * Queueing this task back might have overloaded rq, + * check if we need to kick someone away. + */ + if (has_pushable_dl_tasks(rq)) + push_dl_task(rq); #endif - } unlock: raw_spin_unlock(&rq->lock); @@ -613,10 +614,9 @@ static void update_curr_dl(struct rq *rq) dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec; if (dl_runtime_exceeded(rq, dl_se)) { + dl_se->dl_throttled = 1; __dequeue_task_dl(rq, curr, 0); - if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) - dl_se->dl_throttled = 1; - else + if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted))) enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); if (!is_leftmost(curr, &rq->dl)) @@ -853,7 +853,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) * its rq, the bandwidth timer callback (which clearly has not * run yet) will take care of this. */ - if (p->dl.dl_throttled) + if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) return; enqueue_dl_entity(&p->dl, pi_se, flags); @@ -1073,7 +1073,13 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) { update_curr_dl(rq); - if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) + /* + * Even when we have runtime, update_curr_dl() might have resulted in us + * not being the leftmost task anymore. In that case NEED_RESCHED will + * be set and schedule() will start a new hrtick for the next task. + */ + if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 && + is_leftmost(p, &rq->dl)) start_hrtick_dl(rq, p); } @@ -1166,9 +1172,6 @@ static int find_later_rq(struct task_struct *task) * We have to consider system topology and task affinity * first, then we can look for a suitable cpu. */ - cpumask_copy(later_mask, task_rq(task)->rd->span); - cpumask_and(later_mask, later_mask, cpu_active_mask); - cpumask_and(later_mask, later_mask, &task->cpus_allowed); best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask); if (best_cpu == -1) @@ -1563,6 +1566,7 @@ static void rq_online_dl(struct rq *rq) if (rq->dl.overloaded) dl_set_overload(rq); + cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu); if (rq->dl.dl_nr_running > 0) cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); } @@ -1574,6 +1578,7 @@ static void rq_offline_dl(struct rq *rq) dl_clear_overload(rq); cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); + cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu); } void init_sched_dl_class(void) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 92cc52001e74..8baaf858d25c 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -305,6 +305,7 @@ do { \ PN(next_balance); SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); PN(clock); + PN(clock_task); P(cpu_load[0]); P(cpu_load[1]); P(cpu_load[2]); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fe331fc391f5..7ce18f3c097a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -676,7 +676,6 @@ void init_task_runnable_average(struct task_struct *p) { u32 slice; - p->se.avg.decay_count = 0; slice = sched_slice(task_cfs_rq(p), &p->se) >> 10; p->se.avg.runnable_avg_sum = slice; p->se.avg.runnable_avg_period = slice; @@ -2574,11 +2573,11 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se) u64 decays = atomic64_read(&cfs_rq->decay_counter); decays -= se->avg.decay_count; + se->avg.decay_count = 0; if (!decays) return 0; se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); - se->avg.decay_count = 0; return decays; } @@ -5157,7 +5156,7 @@ static void yield_task_fair(struct rq *rq) * so we don't do microscopic update in schedule() * and double the fastpath cost. */ - rq->skip_clock_update = 1; + rq_clock_skip_update(rq, true); } set_skip_buddy(se); @@ -5949,8 +5948,8 @@ static unsigned long scale_rt_capacity(int cpu) */ age_stamp = ACCESS_ONCE(rq->age_stamp); avg = ACCESS_ONCE(rq->rt_avg); + delta = __rq_clock_broken(rq) - age_stamp; - delta = rq_clock(rq) - age_stamp; if (unlikely(delta < 0)) delta = 0; diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index c47fce75e666..aaf1c1d5cf5d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -47,7 +47,8 @@ static inline int cpu_idle_poll(void) rcu_idle_enter(); trace_cpu_idle_rcuidle(0, smp_processor_id()); local_irq_enable(); - while (!tif_need_resched()) + while (!tif_need_resched() && + (cpu_idle_force_poll || tick_check_broadcast_expired())) cpu_relax(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); rcu_idle_exit(); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index ee15f5a0d1c1..f4d4b077eba0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -831,11 +831,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) enqueue = 1; /* - * Force a clock update if the CPU was idle, - * lest wakeup -> unthrottle time accumulate. + * When we're idle and a woken (rt) task is + * throttled check_preempt_curr() will set + * skip_update and the time between the wakeup + * and this unthrottle will get accounted as + * 'runtime'. */ if (rt_rq->rt_nr_running && rq->curr == rq->idle) - rq->skip_clock_update = -1; + rq_clock_skip_update(rq, false); } if (rt_rq->rt_time || rt_rq->rt_nr_running) idle = 0; @@ -1337,7 +1340,12 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) curr->prio <= p->prio)) { int target = find_lowest_rq(p); - if (target != -1) + /* + * Don't bother moving it if the destination CPU is + * not running a lower priority task. + */ + if (target != -1 && + p->prio < cpu_rq(target)->rt.highest_prio.curr) cpu = target; } rcu_read_unlock(); @@ -1614,6 +1622,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) lowest_rq = cpu_rq(cpu); + if (lowest_rq->rt.highest_prio.curr <= task->prio) { + /* + * Target rq has tasks of equal or higher priority, + * retrying does not release any lock and is unlikely + * to yield a different result. + */ + lowest_rq = NULL; + break; + } + /* if the prio of this runqueue changed, try again */ if (double_lock_balance(rq, lowest_rq)) { /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9a2a45c970e7..0870db23d79c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -558,8 +558,6 @@ struct rq { #ifdef CONFIG_NO_HZ_FULL unsigned long last_sched_tick; #endif - int skip_clock_update; - /* capture load from *all* tasks on this cpu: */ struct load_weight load; unsigned long nr_load_updates; @@ -588,6 +586,7 @@ struct rq { unsigned long next_balance; struct mm_struct *prev_mm; + unsigned int clock_skip_update; u64 clock; u64 clock_task; @@ -687,16 +686,35 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define raw_rq() raw_cpu_ptr(&runqueues) +static inline u64 __rq_clock_broken(struct rq *rq) +{ + return ACCESS_ONCE(rq->clock); +} + static inline u64 rq_clock(struct rq *rq) { + lockdep_assert_held(&rq->lock); return rq->clock; } static inline u64 rq_clock_task(struct rq *rq) { + lockdep_assert_held(&rq->lock); return rq->clock_task; } +#define RQCF_REQ_SKIP 0x01 +#define RQCF_ACT_SKIP 0x02 + +static inline void rq_clock_skip_update(struct rq *rq, bool skip) +{ + lockdep_assert_held(&rq->lock); + if (skip) + rq->clock_skip_update |= RQCF_REQ_SKIP; + else + rq->clock_skip_update &= ~RQCF_REQ_SKIP; +} + #ifdef CONFIG_NUMA enum numa_topology_type { NUMA_DIRECT, diff --git a/kernel/softirq.c b/kernel/softirq.c index 501baa9ac1be..479e4436f787 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -114,8 +114,12 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) trace_softirqs_off(ip); raw_local_irq_restore(flags); - if (preempt_count() == cnt) + if (preempt_count() == cnt) { +#ifdef CONFIG_DEBUG_PREEMPT + current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1); +#endif trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); + } } EXPORT_SYMBOL(__local_bh_disable_ip); #endif /* CONFIG_TRACE_IRQFLAGS */ @@ -656,9 +660,8 @@ static void run_ksoftirqd(unsigned int cpu) * in the task stack here. */ __do_softirq(); - rcu_note_context_switch(); local_irq_enable(); - cond_resched(); + cond_resched_rcu_qs(); return; } local_irq_enable(); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d8c724cda37b..3f5e183c3d97 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -266,7 +266,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) /* * Divide a ktime value by a nanosecond value */ -u64 ktime_divns(const ktime_t kt, s64 div) +u64 __ktime_divns(const ktime_t kt, s64 div) { u64 dclc; int sft = 0; @@ -282,7 +282,7 @@ u64 ktime_divns(const ktime_t kt, s64 div) return dclc; } -EXPORT_SYMBOL_GPL(ktime_divns); +EXPORT_SYMBOL_GPL(__ktime_divns); #endif /* BITS_PER_LONG >= 64 */ /* @@ -440,6 +440,37 @@ static inline void debug_deactivate(struct hrtimer *timer) trace_hrtimer_cancel(timer); } +#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) +static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) +{ + struct hrtimer_clock_base *base = cpu_base->clock_base; + ktime_t expires, expires_next = { .tv64 = KTIME_MAX }; + int i; + + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { + struct timerqueue_node *next; + struct hrtimer *timer; + + next = timerqueue_getnext(&base->active); + if (!next) + continue; + + timer = container_of(next, struct hrtimer, node); + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + if (expires.tv64 < expires_next.tv64) + expires_next = expires; + } + /* + * clock_was_set() might have changed base->offset of any of + * the clock bases so the result might be negative. Fix it up + * to prevent a false positive in clockevents_program_event(). + */ + if (expires_next.tv64 < 0) + expires_next.tv64 = 0; + return expires_next; +} +#endif + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -488,32 +519,7 @@ static inline int hrtimer_hres_active(void) static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { - int i; - struct hrtimer_clock_base *base = cpu_base->clock_base; - ktime_t expires, expires_next; - - expires_next.tv64 = KTIME_MAX; - - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { - struct hrtimer *timer; - struct timerqueue_node *next; - - next = timerqueue_getnext(&base->active); - if (!next) - continue; - timer = container_of(next, struct hrtimer, node); - - expires = ktime_sub(hrtimer_get_expires(timer), base->offset); - /* - * clock_was_set() has changed base->offset so the - * result might be negative. Fix it up to prevent a - * false positive in clockevents_program_event() - */ - if (expires.tv64 < 0) - expires.tv64 = 0; - if (expires.tv64 < expires_next.tv64) - expires_next = expires; - } + ktime_t expires_next = __hrtimer_get_next_event(cpu_base); if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64) return; @@ -587,6 +593,15 @@ static int hrtimer_reprogram(struct hrtimer *timer, return 0; /* + * When the target cpu of the timer is currently executing + * hrtimer_interrupt(), then we do not touch the clock event + * device. hrtimer_interrupt() will reevaluate all clock bases + * before reprogramming the device. + */ + if (cpu_base->in_hrtirq) + return 0; + + /* * If a hang was detected in the last timer interrupt then we * do not schedule a timer which is earlier than the expiry * which we enforced in the hang detection. We want the system @@ -1104,29 +1119,14 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining); ktime_t hrtimer_get_next_event(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); - struct hrtimer_clock_base *base = cpu_base->clock_base; - ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; + ktime_t mindelta = { .tv64 = KTIME_MAX }; unsigned long flags; - int i; raw_spin_lock_irqsave(&cpu_base->lock, flags); - if (!hrtimer_hres_active()) { - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { - struct hrtimer *timer; - struct timerqueue_node *next; - - next = timerqueue_getnext(&base->active); - if (!next) - continue; - - timer = container_of(next, struct hrtimer, node); - delta.tv64 = hrtimer_get_expires_tv64(timer); - delta = ktime_sub(delta, base->get_time()); - if (delta.tv64 < mindelta.tv64) - mindelta.tv64 = delta.tv64; - } - } + if (!hrtimer_hres_active()) + mindelta = ktime_sub(__hrtimer_get_next_event(cpu_base), + ktime_get()); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1253,7 +1253,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) raw_spin_lock(&cpu_base->lock); entry_time = now = hrtimer_update_base(cpu_base); retry: - expires_next.tv64 = KTIME_MAX; + cpu_base->in_hrtirq = 1; /* * We set expires_next to KTIME_MAX here with cpu_base->lock * held to prevent that a timer is enqueued in our queue via @@ -1291,28 +1291,20 @@ retry: * are right-of a not yet expired timer, because that * timer will have to trigger a wakeup anyway. */ - - if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { - ktime_t expires; - - expires = ktime_sub(hrtimer_get_expires(timer), - base->offset); - if (expires.tv64 < 0) - expires.tv64 = KTIME_MAX; - if (expires.tv64 < expires_next.tv64) - expires_next = expires; + if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) break; - } __run_hrtimer(timer, &basenow); } } - + /* Reevaluate the clock bases for the next expiry */ + expires_next = __hrtimer_get_next_event(cpu_base); /* * Store the new expiry value so the migration code can verify * against it. */ cpu_base->expires_next = expires_next; + cpu_base->in_hrtirq = 0; raw_spin_unlock(&cpu_base->lock); /* Reprogramming necessary ? */ diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 28bf91c60a0b..4b585e0fdd22 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -488,13 +488,13 @@ static void sync_cmos_clock(struct work_struct *work) getnstimeofday64(&now); if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) { - struct timespec adjust = timespec64_to_timespec(now); + struct timespec64 adjust = now; fail = -ENODEV; if (persistent_clock_is_local) adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); #ifdef CONFIG_GENERIC_CMOS_UPDATE - fail = update_persistent_clock(adjust); + fail = update_persistent_clock(timespec64_to_timespec(adjust)); #endif #ifdef CONFIG_RTC_SYSTOHC if (fail == -ENODEV) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6a931852082f..b124af259800 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1659,24 +1659,24 @@ out: } /** - * getboottime - Return the real time of system boot. - * @ts: pointer to the timespec to be set + * getboottime64 - Return the real time of system boot. + * @ts: pointer to the timespec64 to be set * - * Returns the wall-time of boot in a timespec. + * Returns the wall-time of boot in a timespec64. * * This is based on the wall_to_monotonic offset and the total suspend * time. Calls to settimeofday will affect the value returned (which * basically means that however wrong your real time clock is at boot time, * you get the right time here). */ -void getboottime(struct timespec *ts) +void getboottime64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); - *ts = ktime_to_timespec(t); + *ts = ktime_to_timespec64(t); } -EXPORT_SYMBOL_GPL(getboottime); +EXPORT_SYMBOL_GPL(getboottime64); unsigned long get_seconds(void) { diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 4b9c114ee9de..6fa484de2ba1 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -261,7 +261,7 @@ void perf_trace_del(struct perf_event *p_event, int flags) } void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs *regs, int *rctxp) + struct pt_regs **regs, int *rctxp) { struct trace_entry *entry; unsigned long flags; @@ -280,6 +280,8 @@ void *perf_trace_buf_prepare(int size, unsigned short type, if (*rctxp < 0) return NULL; + if (regs) + *regs = this_cpu_ptr(&__perf_regs[*rctxp]); raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); /* zero the dead bytes from align to not leak stack to user */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5edb518be345..296079ae6583 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1148,7 +1148,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); + entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); if (!entry) return; @@ -1179,7 +1179,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); + entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); if (!entry) return; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c6ee36fcbf90..f97f6e3a676c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -574,7 +574,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) size -= sizeof(u32); rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, - sys_data->enter_event->event.type, regs, &rctx); + sys_data->enter_event->event.type, NULL, &rctx); if (!rec) return; @@ -647,7 +647,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) size -= sizeof(u32); rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, - sys_data->exit_event->event.type, regs, &rctx); + sys_data->exit_event->event.type, NULL, &rctx); if (!rec) return; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8520acc34b18..b11441321e7a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1111,7 +1111,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, if (hlist_empty(head)) goto out; - entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); + entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); if (!entry) goto out; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 5f2ce616c046..a2ca213c71ca 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1215,6 +1215,7 @@ config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL select TORTURE_TEST + select SRCU default n help This option provides a kernel module that runs torture tests @@ -1257,7 +1258,7 @@ config RCU_CPU_STALL_TIMEOUT config RCU_CPU_STALL_INFO bool "Print additional diagnostics on RCU CPU stall" depends on (TREE_RCU || PREEMPT_RCU) && DEBUG_KERNEL - default n + default y help For each stalled CPU that is aware of the current RCU grace period, print out additional per-CPU diagnostic information diff --git a/mm/Kconfig b/mm/Kconfig index 1d1ae6b078fd..4395b12869c8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -325,6 +325,7 @@ config VIRT_TO_BUS config MMU_NOTIFIER bool + select SRCU config KSM bool "Enable KSM for page merging" diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig index 8eb779b9d77f..604e718d68d3 100644 --- a/security/tomoyo/Kconfig +++ b/security/tomoyo/Kconfig @@ -5,6 +5,7 @@ config SECURITY_TOMOYO select SECURITYFS select SECURITY_PATH select SECURITY_NETWORK + select SRCU default n help This selects TOMOYO Linux, pathname-based access control. diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index 86ea2d7b8845..d2b18e887071 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -1,3 +1,4 @@ +#define _GNU_SOURCE #include <errno.h> #include <stdio.h> #include <stdlib.h> @@ -98,3 +99,45 @@ char *debugfs_mount(const char *mountpoint) out: return debugfs_mountpoint; } + +int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename) +{ + char sbuf[128]; + + switch (err) { + case ENOENT: + if (debugfs_found) { + snprintf(buf, size, + "Error:\tFile %s/%s not found.\n" + "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n", + debugfs_mountpoint, filename); + break; + } + snprintf(buf, size, "%s", + "Error:\tUnable to find debugfs\n" + "Hint:\tWas your kernel compiled with debugfs support?\n" + "Hint:\tIs the debugfs filesystem mounted?\n" + "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); + break; + case EACCES: + snprintf(buf, size, + "Error:\tNo permissions to read %s/%s\n" + "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", + debugfs_mountpoint, filename, debugfs_mountpoint); + break; + default: + snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); + break; + } + + return 0; +} + +int debugfs__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) +{ + char path[PATH_MAX]; + + snprintf(path, PATH_MAX, "tracing/events/%s/%s", sys, name ?: "*"); + + return debugfs__strerror_open(err, buf, size, path); +} diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index f19d3df9609d..0739881a9897 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -26,4 +26,7 @@ char *debugfs_mount(const char *mountpoint); extern char debugfs_mountpoint[]; +int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename); +int debugfs__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name); + #endif /* __API_DEBUGFS_H__ */ diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index cf3a44bf1ec3..afe20ed9fac8 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -32,6 +32,7 @@ #include <stdint.h> #include <limits.h> +#include <netinet/ip6.h> #include "event-parse.h" #include "event-utils.h" @@ -4149,6 +4150,324 @@ static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); } +static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf) +{ + const char *fmt; + + if (i == 'i') + fmt = "%03d.%03d.%03d.%03d"; + else + fmt = "%d.%d.%d.%d"; + + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); +} + +static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) +{ + return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) | + (unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL; +} + +static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) +{ + return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); +} + +static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr) +{ + int i, j, range; + unsigned char zerolength[8]; + int longest = 1; + int colonpos = -1; + uint16_t word; + uint8_t hi, lo; + bool needcolon = false; + bool useIPv4; + struct in6_addr in6; + + memcpy(&in6, addr, sizeof(struct in6_addr)); + + useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); + + memset(zerolength, 0, sizeof(zerolength)); + + if (useIPv4) + range = 6; + else + range = 8; + + /* find position of longest 0 run */ + for (i = 0; i < range; i++) { + for (j = i; j < range; j++) { + if (in6.s6_addr16[j] != 0) + break; + zerolength[i]++; + } + } + for (i = 0; i < range; i++) { + if (zerolength[i] > longest) { + longest = zerolength[i]; + colonpos = i; + } + } + if (longest == 1) /* don't compress a single 0 */ + colonpos = -1; + + /* emit address */ + for (i = 0; i < range; i++) { + if (i == colonpos) { + if (needcolon || i == 0) + trace_seq_printf(s, ":"); + trace_seq_printf(s, ":"); + needcolon = false; + i += longest - 1; + continue; + } + if (needcolon) { + trace_seq_printf(s, ":"); + needcolon = false; + } + /* hex u16 without leading 0s */ + word = ntohs(in6.s6_addr16[i]); + hi = word >> 8; + lo = word & 0xff; + if (hi) + trace_seq_printf(s, "%x%02x", hi, lo); + else + trace_seq_printf(s, "%x", lo); + + needcolon = true; + } + + if (useIPv4) { + if (needcolon) + trace_seq_printf(s, ":"); + print_ip4_addr(s, 'I', &in6.s6_addr[12]); + } + + return; +} + +static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf) +{ + int j; + + for (j = 0; j < 16; j += 2) { + trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]); + if (i == 'I' && j < 14) + trace_seq_printf(s, ":"); + } +} + +/* + * %pi4 print an IPv4 address with leading zeros + * %pI4 print an IPv4 address without leading zeros + * %pi6 print an IPv6 address without colons + * %pI6 print an IPv6 address with colons + * %pI6c print an IPv6 address in compressed form with colons + * %pISpc print an IP address based on sockaddr; p adds port. + */ +static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + unsigned char *buf; + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return 0; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return 0; + } + + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return 0; + } + } + + buf = data + arg->field.field->offset; + + if (arg->field.field->size != 4) { + trace_seq_printf(s, "INVALIDIPv4"); + return 0; + } + print_ip4_addr(s, i, buf); + + return 0; +} + +static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char have_c = 0; + unsigned char *buf; + int rc = 0; + + /* pI6c */ + if (i == 'I' && *ptr == 'c') { + have_c = 1; + ptr++; + rc++; + } + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return rc; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return rc; + } + + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return rc; + } + } + + buf = data + arg->field.field->offset; + + if (arg->field.field->size != 16) { + trace_seq_printf(s, "INVALIDIPv6"); + return rc; + } + + if (have_c) + print_ip6c_addr(s, buf); + else + print_ip6_addr(s, i, buf); + + return rc; +} + +static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char have_c = 0, have_p = 0; + unsigned char *buf; + struct sockaddr_storage *sa; + int rc = 0; + + /* pISpc */ + if (i == 'I') { + if (*ptr == 'p') { + have_p = 1; + ptr++; + rc++; + } + if (*ptr == 'c') { + have_c = 1; + ptr++; + rc++; + } + } + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return rc; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return rc; + } + + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return rc; + } + } + + sa = (struct sockaddr_storage *) (data + arg->field.field->offset); + + if (sa->ss_family == AF_INET) { + struct sockaddr_in *sa4 = (struct sockaddr_in *) sa; + + if (arg->field.field->size < sizeof(struct sockaddr_in)) { + trace_seq_printf(s, "INVALIDIPv4"); + return rc; + } + + print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr); + if (have_p) + trace_seq_printf(s, ":%d", ntohs(sa4->sin_port)); + + + } else if (sa->ss_family == AF_INET6) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa; + + if (arg->field.field->size < sizeof(struct sockaddr_in6)) { + trace_seq_printf(s, "INVALIDIPv6"); + return rc; + } + + if (have_p) + trace_seq_printf(s, "["); + + buf = (unsigned char *) &sa6->sin6_addr; + if (have_c) + print_ip6c_addr(s, buf); + else + print_ip6_addr(s, i, buf); + + if (have_p) + trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port)); + } + + return rc; +} + +static int print_ip_arg(struct trace_seq *s, const char *ptr, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char i = *ptr; /* 'i' or 'I' */ + char ver; + int rc = 0; + + ptr++; + rc++; + + ver = *ptr; + ptr++; + rc++; + + switch (ver) { + case '4': + rc += print_ipv4_arg(s, ptr, i, data, size, event, arg); + break; + case '6': + rc += print_ipv6_arg(s, ptr, i, data, size, event, arg); + break; + case 'S': + rc += print_ipsa_arg(s, ptr, i, data, size, event, arg); + break; + default: + return 0; + } + + return rc; +} + static int is_printable_array(char *p, unsigned int len) { unsigned int i; @@ -4337,6 +4656,15 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event ptr++; arg = arg->next; break; + } else if (*(ptr+1) == 'I' || *(ptr+1) == 'i') { + int n; + + n = print_ip_arg(s, ptr+1, data, size, event, arg); + if (n > 0) { + ptr += n; + arg = arg->next; + break; + } } /* fall through */ diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index fd77d81ea748..0294c57b1f5e 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -38,7 +38,7 @@ OPTIONS --remove=:: Remove specified file from the cache. -M:: ---missing=:: +--missing=:: List missing build ids in the cache for the specified file. -u:: --update:: diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index cbb4f743d921..3e2aec94f806 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -89,6 +89,19 @@ raw encoding of 0x1A8 can be used: You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. +PARAMETERIZED EVENTS +-------------------- + +Some pmu events listed by 'perf-list' will be displayed with '?' in them. For +example: + + hv_gpci/dtbp_ptitc,phys_processor_idx=?/ + +This means that when provided as an event, a value for '?' must +also be supplied. For example: + + perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... + OPTIONS ------- diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 1d78a4064da4..43310d8661fe 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -12,11 +12,12 @@ SYNOPSIS DESCRIPTION ----------- -"perf mem -t <TYPE> record" runs a command and gathers memory operation data +"perf mem record" runs a command and gathers memory operation data from it, into perf.data. Perf record options are accepted and are passed through. -"perf mem -t <TYPE> report" displays the result. It invokes perf report with the -right set of options to display a memory access profile. +"perf mem report" displays the result. It invokes perf report with the +right set of options to display a memory access profile. By default, loads +and stores are sampled. Use the -t option to limit to loads or stores. Note that on Intel systems the memory latency reported is the use-latency, not the pure load (or store latency). Use latency includes any pipeline @@ -29,7 +30,7 @@ OPTIONS -t:: --type=:: - Select the memory operation type: load or store (default: load) + Select the memory operation type: load or store (default: load,store) -D:: --dump-raw-samples=:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index af9a54ece024..31e977459c51 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -33,12 +33,27 @@ OPTIONS - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a hexadecimal event descriptor. - - a hardware breakpoint event in the form of '\mem:addr[:access]' + - a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where + 'param1', 'param2', etc are defined as formats for the PMU in + /sys/bus/event_sources/devices/<pmu>/format/*. + + - a symbolically formed event like 'pmu/config=M,config1=N,config3=K/' + + where M, N, K are numbers (in decimal, hex, octal format). Acceptable + values for each of 'config', 'config1' and 'config2' are defined by + corresponding entries in /sys/bus/event_sources/devices/<pmu>/format/* + param1 and param2 are defined as formats for the PMU in: + /sys/bus/event_sources/devices/<pmu>/format/* + + - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' where addr is the address in memory you want to break in. Access is the memory access type (read, write, execute) it can - be passed as follows: '\mem:addr[:[r][w][x]]'. + be passed as follows: '\mem:addr[:[r][w][x]]'. len is the range, + number of bytes from specified addr, which the breakpoint will cover. If you want to profile read-write accesses in 0x1000, just set 'mem:0x1000:rw'. + If you want to profile write accesses in [0x1000~1008), just set + 'mem:0x1000/8:w'. --filter=<filter>:: Event filter. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 21494806c0ab..a21eec05bc42 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -125,46 +125,46 @@ OPTIONS is equivalent to: perf script -f trace:<fields> -f sw:<fields> -f hw:<fields> - + i.e., the specified fields apply to all event types if the type string is not given. - + The arguments are processed in the order received. A later usage can reset a prior request. e.g.: - + -f trace: -f comm,tid,time,ip,sym - + The first -f suppresses trace events (field list is ""), but then the second invocation sets the fields to comm,tid,time,ip,sym. In this case a warning is given to the user: - + "Overriding previous field request for all events." - + Alternatively, consider the order: - + -f comm,tid,time,ip,sym -f trace: - + The first -f sets the fields for all events and the second -f suppresses trace events. The user is given a warning message about the override, and the result of the above is that only S/W and H/W events are displayed with the given fields. - + For the 'wildcard' option if a user selected field is invalid for an event type, a message is displayed to the user that the option is ignored for that type. For example: - + $ perf script -f comm,tid,trace 'trace' not valid for hardware events. Ignoring. 'trace' not valid for software events. Ignoring. - + Alternatively, if the type is given an invalid field is specified it is an error. For example: - + perf script -v -f sw:comm,tid,trace 'trace' not valid for software events. - + At this point usage is displayed, and perf-script exits. - + Finally, a user may not set fields to none for all event types. i.e., -f "" is not allowed. diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 29ee857c09c6..04e150d83e7d 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -25,10 +25,22 @@ OPTIONS -e:: --event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + Select the PMU event. Selection can be: + + - a symbolic event name (use 'perf list' to list all events) + + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + + - a symbolically formed event like 'pmu/param1=0x3,param2/' where + param1 and param2 are defined as formats for the PMU in + /sys/bus/event_sources/devices/<pmu>/format/* + + - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' + where M, N, K are numbers (in decimal, hex, octal format). + Acceptable values for each of 'config', 'config1' and 'config2' + parameters are defined by corresponding entries in + /sys/bus/event_sources/devices/<pmu>/format/* -i:: --no-inherit:: diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 71f2844cf97f..7ed22ff1e1ac 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -68,4 +68,17 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak val, opflags); } +#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP +#include <pthread.h> +static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr, + size_t cpusetsize, + cpu_set_t *cpuset) +{ + attr = attr; + cpusetsize = cpusetsize; + cpuset = cpuset; + return 0; +} +#endif + #endif /* _FUTEX_H */ diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 77d5cae54c6a..50e6b66aea1f 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -236,10 +236,10 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) if (errno == ENOENT) return false; - pr_warning("Problems with %s file, consider removing it from the cache\n", + pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) { - pr_warning("Problems with %s file, consider removing it from the cache\n", + pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1fd96c13f199..74aada554b12 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -390,6 +390,15 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist) } } +static struct data__file *fmt_to_data_file(struct perf_hpp_fmt *fmt) +{ + struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt); + void *ptr = dfmt - dfmt->idx; + struct data__file *d = container_of(ptr, struct data__file, fmt); + + return d; +} + static struct hist_entry* get_pair_data(struct hist_entry *he, struct data__file *d) { @@ -407,8 +416,7 @@ get_pair_data(struct hist_entry *he, struct data__file *d) static struct hist_entry* get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt) { - void *ptr = dfmt - dfmt->idx; - struct data__file *d = container_of(ptr, struct data__file, fmt); + struct data__file *d = fmt_to_data_file(&dfmt->fmt); return get_pair_data(he, d); } @@ -430,7 +438,7 @@ static void hists__baseline_only(struct hists *hists) next = rb_next(&he->rb_node_in); if (!hist_entry__next_pair(he)) { rb_erase(&he->rb_node_in, root); - hist_entry__free(he); + hist_entry__delete(he); } } } @@ -448,26 +456,30 @@ static void hists__precompute(struct hists *hists) next = rb_first(root); while (next != NULL) { struct hist_entry *he, *pair; + struct data__file *d; + int i; he = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&he->rb_node_in); - pair = get_pair_data(he, &data__files[sort_compute]); - if (!pair) - continue; + data__for_each_file_new(i, d) { + pair = get_pair_data(he, d); + if (!pair) + continue; - switch (compute) { - case COMPUTE_DELTA: - compute_delta(he, pair); - break; - case COMPUTE_RATIO: - compute_ratio(he, pair); - break; - case COMPUTE_WEIGHTED_DIFF: - compute_wdiff(he, pair); - break; - default: - BUG_ON(1); + switch (compute) { + case COMPUTE_DELTA: + compute_delta(he, pair); + break; + case COMPUTE_RATIO: + compute_ratio(he, pair); + break; + case COMPUTE_WEIGHTED_DIFF: + compute_wdiff(he, pair); + break; + default: + BUG_ON(1); + } } } } @@ -517,7 +529,7 @@ __hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, static int64_t hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, - int c) + int c, int sort_idx) { bool pairs_left = hist_entry__has_pairs(left); bool pairs_right = hist_entry__has_pairs(right); @@ -529,8 +541,8 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, if (!pairs_left || !pairs_right) return pairs_left ? -1 : 1; - p_left = get_pair_data(left, &data__files[sort_compute]); - p_right = get_pair_data(right, &data__files[sort_compute]); + p_left = get_pair_data(left, &data__files[sort_idx]); + p_right = get_pair_data(right, &data__files[sort_idx]); if (!p_left && !p_right) return 0; @@ -546,90 +558,102 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, } static int64_t -hist_entry__cmp_nop(struct hist_entry *left __maybe_unused, +hist_entry__cmp_compute_idx(struct hist_entry *left, struct hist_entry *right, + int c, int sort_idx) +{ + struct hist_entry *p_right, *p_left; + + p_left = get_pair_data(left, &data__files[sort_idx]); + p_right = get_pair_data(right, &data__files[sort_idx]); + + if (!p_left && !p_right) + return 0; + + if (!p_left || !p_right) + return p_left ? -1 : 1; + + if (c != COMPUTE_DELTA) { + /* + * The delta can be computed without the baseline, but + * others are not. Put those entries which have no + * values below. + */ + if (left->dummy && right->dummy) + return 0; + + if (left->dummy || right->dummy) + return left->dummy ? 1 : -1; + } + + return __hist_entry__cmp_compute(p_left, p_right, c); +} + +static int64_t +hist_entry__cmp_nop(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left __maybe_unused, struct hist_entry *right __maybe_unused) { return 0; } static int64_t -hist_entry__cmp_baseline(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp_baseline(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) { - if (sort_compute) - return 0; - if (left->stat.period == right->stat.period) return 0; return left->stat.period > right->stat.period ? 1 : -1; } static int64_t -hist_entry__cmp_delta(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp_delta(struct perf_hpp_fmt *fmt, + struct hist_entry *left, struct hist_entry *right) { - return hist_entry__cmp_compute(right, left, COMPUTE_DELTA); + struct data__file *d = fmt_to_data_file(fmt); + + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA, d->idx); } static int64_t -hist_entry__cmp_ratio(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt, + struct hist_entry *left, struct hist_entry *right) { - return hist_entry__cmp_compute(right, left, COMPUTE_RATIO); + struct data__file *d = fmt_to_data_file(fmt); + + return hist_entry__cmp_compute(right, left, COMPUTE_RATIO, d->idx); } static int64_t -hist_entry__cmp_wdiff(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt, + struct hist_entry *left, struct hist_entry *right) { - return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF); + struct data__file *d = fmt_to_data_file(fmt); + + return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF, d->idx); } -static void insert_hist_entry_by_compute(struct rb_root *root, - struct hist_entry *he, - int c) +static int64_t +hist_entry__cmp_delta_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - if (hist_entry__cmp_compute(he, iter, c) < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); + return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA, + sort_compute); } -static void hists__compute_resort(struct hists *hists) +static int64_t +hist_entry__cmp_ratio_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) { - struct rb_root *root; - struct rb_node *next; - - if (sort__need_collapse) - root = &hists->entries_collapsed; - else - root = hists->entries_in; - - hists->entries = RB_ROOT; - next = rb_first(root); - - hists__reset_stats(hists); - hists__reset_col_len(hists); - - while (next != NULL) { - struct hist_entry *he; - - he = rb_entry(next, struct hist_entry, rb_node_in); - next = rb_next(&he->rb_node_in); - - insert_hist_entry_by_compute(&hists->entries, he, compute); - hists__inc_stats(hists, he); + return hist_entry__cmp_compute_idx(right, left, COMPUTE_RATIO, + sort_compute); +} - if (!he->filtered) - hists__calc_col_len(hists, he); - } +static int64_t +hist_entry__cmp_wdiff_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_WEIGHTED_DIFF, + sort_compute); } static void hists__process(struct hists *hists) @@ -637,12 +661,8 @@ static void hists__process(struct hists *hists) if (show_baseline_only) hists__baseline_only(hists); - if (sort_compute) { - hists__precompute(hists); - hists__compute_resort(hists); - } else { - hists__output_resort(hists, NULL); - } + hists__precompute(hists); + hists__output_resort(hists, NULL); hists__fprintf(hists, true, 0, 0, 0, stdout); } @@ -841,7 +861,7 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, char pfmt[20] = " "; if (!pair) - goto dummy_print; + goto no_print; switch (comparison_method) { case COMPUTE_DELTA: @@ -850,8 +870,6 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, else diff = compute_delta(he, pair); - if (fabs(diff) < 0.01) - goto dummy_print; scnprintf(pfmt, 20, "%%%+d.2f%%%%", dfmt->header_width - 1); return percent_color_snprintf(hpp->buf, hpp->size, pfmt, diff); @@ -883,6 +901,9 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, } dummy_print: return scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, "N/A"); +no_print: + return scnprintf(hpp->buf, hpp->size, "%*s", dfmt->header_width, pfmt); } @@ -932,14 +953,15 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, else diff = compute_delta(he, pair); - if (fabs(diff) >= 0.01) - scnprintf(buf, size, "%+4.2F%%", diff); + scnprintf(buf, size, "%+4.2F%%", diff); break; case PERF_HPP_DIFF__RATIO: /* No point for ratio number if we are dummy.. */ - if (he->dummy) + if (he->dummy) { + scnprintf(buf, size, "N/A"); break; + } if (pair->diff.computed) ratio = pair->diff.period_ratio; @@ -952,8 +974,10 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, case PERF_HPP_DIFF__WEIGHTED_DIFF: /* No point for wdiff number if we are dummy.. */ - if (he->dummy) + if (he->dummy) { + scnprintf(buf, size, "N/A"); break; + } if (pair->diff.computed) wdiff = pair->diff.wdiff; @@ -1105,9 +1129,10 @@ static void data__hpp_register(struct data__file *d, int idx) perf_hpp__register_sort_field(fmt); } -static void ui_init(void) +static int ui_init(void) { struct data__file *d; + struct perf_hpp_fmt *fmt; int i; data__for_each_file(i, d) { @@ -1137,6 +1162,46 @@ static void ui_init(void) data__hpp_register(d, i ? PERF_HPP_DIFF__PERIOD : PERF_HPP_DIFF__PERIOD_BASELINE); } + + if (!sort_compute) + return 0; + + /* + * Prepend an fmt to sort on columns at 'sort_compute' first. + * This fmt is added only to the sort list but not to the + * output fields list. + * + * Note that this column (data) can be compared twice - one + * for this 'sort_compute' fmt and another for the normal + * diff_hpp_fmt. But it shouldn't a problem as most entries + * will be sorted out by first try or baseline and comparing + * is not a costly operation. + */ + fmt = zalloc(sizeof(*fmt)); + if (fmt == NULL) { + pr_err("Memory allocation failed\n"); + return -1; + } + + fmt->cmp = hist_entry__cmp_nop; + fmt->collapse = hist_entry__cmp_nop; + + switch (compute) { + case COMPUTE_DELTA: + fmt->sort = hist_entry__cmp_delta_idx; + break; + case COMPUTE_RATIO: + fmt->sort = hist_entry__cmp_ratio_idx; + break; + case COMPUTE_WEIGHTED_DIFF: + fmt->sort = hist_entry__cmp_wdiff_idx; + break; + default: + BUG_ON(1); + } + + list_add(&fmt->sort_list, &perf_hpp__sort_list); + return 0; } static int data_init(int argc, const char **argv) @@ -1202,7 +1267,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) if (data_init(argc, argv) < 0) return -1; - ui_init(); + if (ui_init() < 0) + return -1; sort__mode = SORT_MODE__DIFF; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 84df2deed988..a13641e066f5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -343,6 +343,7 @@ static int __cmd_inject(struct perf_inject *inject) int ret = -EINVAL; struct perf_session *session = inject->session; struct perf_data_file *file_out = &inject->output; + int fd = perf_data_file__fd(file_out); signal(SIGINT, sig_handler); @@ -376,7 +377,7 @@ static int __cmd_inject(struct perf_inject *inject) } if (!file_out->is_pipe) - lseek(file_out->fd, session->header.data_offset, SEEK_SET); + lseek(fd, session->header.data_offset, SEEK_SET); ret = perf_session__process_events(session, &inject->tool); @@ -385,7 +386,7 @@ static int __cmd_inject(struct perf_inject *inject) perf_header__set_feat(&session->header, HEADER_BUILD_ID); session->header.data_size = inject->bytes_written; - perf_session__write_header(session, session->evlist, file_out->fd, true); + perf_session__write_header(session, session->evlist, fd, true); } return ret; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 24db6ffe2957..9b5663950a4d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -7,44 +7,47 @@ #include "util/session.h" #include "util/data.h" -#define MEM_OPERATION_LOAD "load" -#define MEM_OPERATION_STORE "store" - -static const char *mem_operation = MEM_OPERATION_LOAD; +#define MEM_OPERATION_LOAD 0x1 +#define MEM_OPERATION_STORE 0x2 struct perf_mem { struct perf_tool tool; char const *input_name; bool hide_unresolved; bool dump_raw; + int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; -static int __cmd_record(int argc, const char **argv) +static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) { int rec_argc, i = 0, j; const char **rec_argv; - char event[64]; int ret; - rec_argc = argc + 4; + rec_argc = argc + 7; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; - rec_argv[i++] = strdup("record"); - if (!strcmp(mem_operation, MEM_OPERATION_LOAD)) - rec_argv[i++] = strdup("-W"); - rec_argv[i++] = strdup("-d"); - rec_argv[i++] = strdup("-e"); + rec_argv[i++] = "record"; - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) - sprintf(event, "cpu/mem-stores/pp"); - else - sprintf(event, "cpu/mem-loads/pp"); + if (mem->operation & MEM_OPERATION_LOAD) + rec_argv[i++] = "-W"; + + rec_argv[i++] = "-d"; + + if (mem->operation & MEM_OPERATION_LOAD) { + rec_argv[i++] = "-e"; + rec_argv[i++] = "cpu/mem-loads/pp"; + } + + if (mem->operation & MEM_OPERATION_STORE) { + rec_argv[i++] = "-e"; + rec_argv[i++] = "cpu/mem-stores/pp"; + } - rec_argv[i++] = strdup(event); for (j = 1; j < argc; j++, i++) rec_argv[i] = argv[j]; @@ -162,17 +165,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) if (!rep_argv) return -1; - rep_argv[i++] = strdup("report"); - rep_argv[i++] = strdup("--mem-mode"); - rep_argv[i++] = strdup("-n"); /* display number of samples */ + rep_argv[i++] = "report"; + rep_argv[i++] = "--mem-mode"; + rep_argv[i++] = "-n"; /* display number of samples */ /* * there is no weight (cost) associated with stores, so don't print * the column */ - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) - rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked"); + if (!(mem->operation & MEM_OPERATION_LOAD)) + rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," + "dso_daddr,tlb,locked"; for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; @@ -182,6 +185,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) return ret; } +struct mem_mode { + const char *name; + int mode; +}; + +#define MEM_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define MEM_END { .name = NULL } + +static const struct mem_mode mem_modes[]={ + MEM_OPT("load", MEM_OPERATION_LOAD), + MEM_OPT("store", MEM_OPERATION_STORE), + MEM_END +}; + +static int +parse_mem_ops(const struct option *opt, const char *str, int unset) +{ + int *mode = (int *)opt->value; + const struct mem_mode *m; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* str may be NULL in case no arg is passed to -t */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + /* reset mode */ + *mode = 0; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (m = mem_modes; m->name; m++) { + if (!strcasecmp(s, m->name)) + break; + } + if (!m->name) { + fprintf(stderr, "unknown sampling op %s," + " check man page\n", s); + goto error; + } + + *mode |= m->mode; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + if (*mode == 0) + *mode = MEM_OPERATION_LOAD; +error: + free(os); + return ret; +} + int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) { struct stat st; @@ -197,10 +269,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_events = true, }, .input_name = "perf.data", + /* + * default to both load an store sampling + */ + .operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE, }; const struct option mem_options[] = { - OPT_STRING('t', "type", &mem_operation, - "type", "memory operations(load/store)"), + OPT_CALLBACK('t', "type", &mem.operation, + "type", "memory operations(load,store) Default load,store", + parse_mem_ops), OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw, "dump raw samples in ASCII"), OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved, @@ -225,7 +302,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation)) + if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation)) usage_with_options(mem_usage, mem_options); if (!mem.input_name || !strlen(mem.input_name)) { @@ -236,7 +313,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) } if (!strncmp(argv[0], "rec", 3)) - return __cmd_record(argc, argv); + return __cmd_record(argc, argv, &mem); else if (!strncmp(argv[0], "rep", 3)) return report_events(argc, argv, &mem); else diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8648c6d3003d..404ab3434052 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -190,16 +190,30 @@ out: return rc; } +static int process_sample_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct record *rec = container_of(tool, struct record, tool); + + rec->samples++; + + return build_id__mark_dso_hit(tool, event, sample, evsel, machine); +} + static int process_buildids(struct record *rec) { struct perf_data_file *file = &rec->file; struct perf_session *session = rec->session; - u64 start = session->header.data_offset; - u64 size = lseek(file->fd, 0, SEEK_CUR); + u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); if (size == 0) return 0; + file->size = size; + /* * During this process, it'll load kernel map and replace the * dso->long_name to a real pathname it found. In this case @@ -211,9 +225,7 @@ static int process_buildids(struct record *rec) */ symbol_conf.ignore_vmlinux_buildid = true; - return __perf_session__process_events(session, start, - size - start, - size, &build_id__mark_dso_hit_ops); + return perf_session__process_events(session, &rec->tool); } static void perf_event__synthesize_guest_os(struct machine *machine, void *data) @@ -322,6 +334,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data_file *file = &rec->file; struct perf_session *session; bool disabled = false, draining = false; + int fd; rec->progname = argv[0]; @@ -336,6 +349,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) return -1; } + fd = perf_data_file__fd(file); rec->session = session; record__init_features(rec); @@ -360,12 +374,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); if (file->is_pipe) { - err = perf_header__write_pipe(file->fd); + err = perf_header__write_pipe(fd); if (err < 0) goto out_child; } else { - err = perf_session__write_header(session, rec->evlist, - file->fd, false); + err = perf_session__write_header(session, rec->evlist, fd, false); if (err < 0) goto out_child; } @@ -397,7 +410,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * return this more properly and also * propagate errors that now are calling die() */ - err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist, + err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, process_synthesized_event); if (err <= 0) { pr_err("Couldn't record tracing data.\n"); @@ -504,19 +517,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } - if (!quiet) { + if (!quiet) fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); - /* - * Approximate RIP event size: 24 bytes. - */ - fprintf(stderr, - "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", - (double)rec->bytes_written / 1024.0 / 1024.0, - file->path, - rec->bytes_written / 24); - } - out_child: if (forks) { int exit_status; @@ -535,13 +538,29 @@ out_child: } else status = err; + /* this will be recalculated during process_buildids() */ + rec->samples = 0; + if (!err && !file->is_pipe) { rec->session->header.data_size += rec->bytes_written; if (!rec->no_buildid) process_buildids(rec); - perf_session__write_header(rec->session, rec->evlist, - file->fd, true); + perf_session__write_header(rec->session, rec->evlist, fd, true); + } + + if (!err && !quiet) { + char samples[128]; + + if (rec->samples) + scnprintf(samples, sizeof(samples), + " (%" PRIu64 " samples)", rec->samples); + else + samples[0] = '\0'; + + fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n", + perf_data_file__size(file) / 1024.0 / 1024.0, + file->path, samples); } out_delete_session: @@ -720,6 +739,13 @@ static struct record record = { .default_per_cpu = true, }, }, + .tool = { + .sample = process_sample_event, + .fork = perf_event__process_fork, + .comm = perf_event__process_comm, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + }, }; #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 072ae8ad67fc..2f91094e228b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -86,17 +86,6 @@ static int report__config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } -static void report__inc_stats(struct report *rep, struct hist_entry *he) -{ - /* - * The @he is either of a newly created one or an existing one - * merging current sample. We only want to count a new one so - * checking ->nr_events being 1. - */ - if (he->stat.nr_events == 1) - rep->nr_entries++; -} - static int hist_iter__report_callback(struct hist_entry_iter *iter, struct addr_location *al, bool single, void *arg) @@ -108,8 +97,6 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, struct mem_info *mi; struct branch_info *bi; - report__inc_stats(rep, he); - if (!ui__has_annotation()) return 0; @@ -499,6 +486,9 @@ static int __cmd_report(struct report *rep) report__warn_kptr_restrict(rep); + evlist__for_each(session->evlist, pos) + rep->nr_entries += evsel__hists(pos)->nr_entries; + if (use_browser == 0) { if (verbose > 3) perf_session__fprintf(session, stdout); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 891086376381..e598e4e98170 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1730,7 +1730,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "detailed run - start a lot of events"), OPT_BOOLEAN('S', "sync", &sync_run, "call sync() before starting a run"), - OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, + OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, "print large numbers with thousands\' separators", stat__set_big_num), OPT_STRING('C', "cpu", &target.cpu_list, "cpu", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 616f0fcb4701..c4c7eac69de4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -165,7 +165,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip) err ? "[unknown]" : uts.release, perf_version_string); if (use_browser <= 0) sleep(5); - + map->erange_warned = true; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index badfabc6a01f..7e935f1083ec 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -929,66 +929,66 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, { .name = "close", .errmsg = true, - .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, { .name = "dup", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup2", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup3", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, { .name = "faccessat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "fadvise64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fallocate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fchown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchownat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_STRARRAY, /* cmd */ }, .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, { .name = "fdatasync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "flock", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_FLOCK, /* cmd */ }, }, { .name = "fsetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "fstatfs", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fsync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "ftruncate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, { .name = "futimesat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "getdents", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getdents64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "ioctl", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ + .arg_scnprintf = { [0] = SCA_FD, /* fd */ #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches. @@ -1002,7 +1002,7 @@ static struct syscall_fmt { { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "linkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "lseek", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [2] = SCA_STRARRAY, /* whence */ }, @@ -1012,9 +1012,9 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, { .name = "mkdirat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mknodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mlock", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mlockall", .errmsg = true, @@ -1036,9 +1036,9 @@ static struct syscall_fmt { { .name = "munmap", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "name_to_handle_at", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "newfstatat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "open", .errmsg = true, .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, @@ -1052,20 +1052,20 @@ static struct syscall_fmt { { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, { .name = "pread", .errmsg = true, .alias = "pread64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "preadv", .errmsg = true, .alias = "pread", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, { .name = "pwrite", .errmsg = true, .alias = "pwrite64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "pwritev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "read", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "readlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "readv", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "recvfrom", .errmsg = true, .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmmsg", .errmsg = true, @@ -1073,7 +1073,7 @@ static struct syscall_fmt { { .name = "recvmsg", .errmsg = true, .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "renameat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, @@ -1091,7 +1091,7 @@ static struct syscall_fmt { { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "shutdown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, @@ -1102,7 +1102,7 @@ static struct syscall_fmt { .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "symlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "tgkill", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "tkill", .errmsg = true, @@ -1113,9 +1113,9 @@ static struct syscall_fmt { { .name = "utimensat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, { .name = "write", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "writev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -1191,7 +1191,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) if (thread__priv(thread) == NULL) thread__set_priv(thread, thread_trace__new()); - + if (thread__priv(thread) == NULL) goto fail; @@ -2056,23 +2056,24 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->trace_syscalls && perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) - goto out_error_tp; + goto out_error_raw_syscalls; if (trace->trace_syscalls) perf_evlist__add_vfs_getname(evlist); if ((trace->trace_pgfaults & TRACE_PFMAJ) && - perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) - goto out_error_tp; + perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { + goto out_error_mem; + } if ((trace->trace_pgfaults & TRACE_PFMIN) && perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) - goto out_error_tp; + goto out_error_mem; if (trace->sched && - perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", - trace__sched_stat_runtime)) - goto out_error_tp; + perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", + trace__sched_stat_runtime)) + goto out_error_sched_stat_runtime; err = perf_evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { @@ -2202,8 +2203,12 @@ out: { char errbuf[BUFSIZ]; -out_error_tp: - perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); +out_error_sched_stat_runtime: + debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); + goto out_error; + +out_error_raw_syscalls: + debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); goto out_error; out_error_mmap: @@ -2217,6 +2222,9 @@ out_error: fprintf(trace->output, "%s\n", errbuf); goto out_delete_evlist; } +out_error_mem: + fprintf(trace->output, "Not enough memory to run!\n"); + goto out_delete_evlist; } static int trace__replay(struct trace *trace) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 648e31ff4021..cc224080b525 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -198,6 +198,7 @@ CORE_FEATURE_TESTS = \ libpython-version \ libslang \ libunwind \ + pthread-attr-setaffinity-np \ stackprotector-all \ timerfd \ libdw-dwarf-unwind \ @@ -226,6 +227,7 @@ VF_FEATURE_TESTS = \ libelf-getphdrnum \ libelf-mmap \ libpython-version \ + pthread-attr-setaffinity-np \ stackprotector-all \ timerfd \ libunwind-debug-frame \ @@ -301,6 +303,10 @@ ifeq ($(feature-sync-compare-and-swap), 1) CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT endif +ifeq ($(feature-pthread-attr-setaffinity-np), 1) + CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP +endif + ifndef NO_BIONIC $(call feature_check,bionic) ifeq ($(feature-bionic), 1) diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 53f19b5dbc37..42ac05aaf8ac 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -25,6 +25,7 @@ FILES= \ test-libslang.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ + test-pthread-attr-setaffinity-np.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ @@ -47,6 +48,9 @@ test-all.bin: test-hello.bin: $(BUILD) +test-pthread-attr-setaffinity-np.bin: + $(BUILD) -Werror -lpthread + test-stackprotector-all.bin: $(BUILD) -Werror -fstack-protector-all diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 652e0098eba6..6d4d09323922 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -97,6 +97,10 @@ # include "test-zlib.c" #undef main +#define main main_test_pthread_attr_setaffinity_np +# include "test-pthread_attr_setaffinity_np.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -121,6 +125,7 @@ int main(int argc, char *argv[]) main_test_libdw_dwarf_unwind(); main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); + main_test_pthread_attr_setaffinity_np(); return 0; } diff --git a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c b/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c new file mode 100644 index 000000000000..0a0d3ecb4e8a --- /dev/null +++ b/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c @@ -0,0 +1,14 @@ +#include <stdint.h> +#include <pthread.h> + +int main(void) +{ + int ret = 0; + pthread_attr_t thread_attr; + + pthread_attr_init(&thread_attr); + /* don't care abt exact args, just the API itself in libpthread */ + ret = pthread_attr_setaffinity_np(&thread_attr, 0, NULL); + + return ret; +} diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index c9b4b6269b51..1091bd47adfd 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -104,7 +104,6 @@ class Event(dict): continue if not self.compare_data(self[t], other[t]): log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) - # Test file description needs to have following sections: # [config] diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 8d110dec393e..18619966454c 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -140,7 +140,7 @@ static void del_hist_entries(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); rb_erase(node, root_out); rb_erase(&he->rb_node_in, root_in); - hist_entry__free(he); + hist_entry__delete(he); } } diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index f5547610da02..b52c9faea224 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -106,7 +106,7 @@ static void del_hist_entries(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); rb_erase(node, root_out); rb_erase(&he->rb_node_in, root_in); - hist_entry__free(he); + hist_entry__delete(he); } } diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 69a71ff84e01..75709d2b17b4 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -222,7 +222,6 @@ tarpkg: @cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \ echo "- $@: $$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 - all: $(run) $(run_O) tarpkg @echo OK diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7f2f51f93619..1cdab0ce00e2 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1145,6 +1145,49 @@ static int test__pinned_group(struct perf_evlist *evlist) return 0; } +static int test__checkevent_breakpoint_len(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == + evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_1 == + evsel->attr.bp_len); + + return 0; +} + +static int test__checkevent_breakpoint_len_w(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_W == + evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_2 == + evsel->attr.bp_len); + + return 0; +} + +static int +test__checkevent_breakpoint_len_rw_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_breakpoint_rw(evlist); +} + static int count_tracepoints(void) { char events_path[PATH_MAX]; @@ -1420,6 +1463,21 @@ static struct evlist_test test__events[] = { .check = test__pinned_group, .id = 41, }, + { + .name = "mem:0/1", + .check = test__checkevent_breakpoint_len, + .id = 42, + }, + { + .name = "mem:0/2:w", + .check = test__checkevent_breakpoint_len_w, + .id = 43, + }, + { + .name = "mem:0/4:rw:u", + .check = test__checkevent_breakpoint_len_rw_modifier, + .id = 44 + }, #if defined(__s390x__) { .name = "kvm-s390:kvm_s390_create_vm", @@ -1471,7 +1529,7 @@ static int test_event(struct evlist_test *e) } else { ret = e->check(evlist); } - + perf_evlist__delete(evlist); return ret; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 4908c648a597..30c02181e78b 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -110,7 +110,7 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_STACK_USER) { COMP(user_stack.size); - if (memcmp(s1->user_stack.data, s1->user_stack.data, + if (memcmp(s1->user_stack.data, s2->user_stack.data, s1->user_stack.size)) { pr_debug("Samples differ at 'user_stack'\n"); return false; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 1e0a2fd80115..9d32e3c0cfee 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -517,7 +517,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser) } annotate_browser__set_top(browser, dl, idx); - + return true; } @@ -867,7 +867,6 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser ++browser->nr_jumps; } - } static inline int width_jumps(int n) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 482adae3cc44..25d608394d74 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -285,7 +285,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort(a, b, he_get_##_field); \ } @@ -312,7 +313,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_ACC_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort_acc(a, b, he_get_acc_##_field); \ } @@ -331,7 +333,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_RAW_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort(a, b, he_get_raw_##_field); \ } @@ -361,7 +364,8 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period) HPP_RAW_FNS(samples, nr_events) HPP_RAW_FNS(period, period) -static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused, +static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *a __maybe_unused, struct hist_entry *b __maybe_unused) { return 0; diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index f34f89eb607c..717d39d3052b 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -4,12 +4,12 @@ #include <linux/types.h> void ui_progress__finish(void); - + struct ui_progress { const char *title; u64 curr, next, step, total; }; - + void ui_progress__init(struct ui_progress *p, u64 total, const char *title); void ui_progress__update(struct ui_progress *p, u64 adv); diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c index 1c8b9afd5d6e..88f5143a5981 100644 --- a/tools/perf/ui/tui/helpline.c +++ b/tools/perf/ui/tui/helpline.c @@ -9,6 +9,7 @@ #include "../libslang.h" char ui_helpline__last_msg[1024]; +bool tui_helpline__set; static void tui_helpline__pop(void) { @@ -35,6 +36,8 @@ static int tui_helpline__show(const char *format, va_list ap) sizeof(ui_helpline__last_msg) - backlog, format, ap); backlog += ret; + tui_helpline__set = true; + if (ui_helpline__last_msg[backlog - 1] == '\n') { ui_helpline__puts(ui_helpline__last_msg); SLsmg_refresh(); diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 3c38f25b1695..b77e1d771363 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -17,6 +17,7 @@ static volatile int ui__need_resize; extern struct perf_error_ops perf_tui_eops; +extern bool tui_helpline__set; extern void hist_browser__init_hpp(void); @@ -159,7 +160,7 @@ out: void ui__exit(bool wait_for_ok) { - if (wait_for_ok) + if (wait_for_ok && tui_helpline__set) ui__question_window("Fatal Error", ui_helpline__last_msg, "Press any key...", 0); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 01bc4e23a2cf..61bf9128e1f2 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -239,7 +239,7 @@ static int mov__parse(struct ins_operands *ops) *s = '\0'; ops->source.raw = strdup(ops->raw); *s = ','; - + if (ops->source.raw == NULL) return -1; diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index f4654183d391..55355b3d4f85 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -5,132 +5,6 @@ int perf_use_color_default = -1; -static int parse_color(const char *name, int len) -{ - static const char * const color_names[] = { - "normal", "black", "red", "green", "yellow", - "blue", "magenta", "cyan", "white" - }; - char *end; - int i; - - for (i = 0; i < (int)ARRAY_SIZE(color_names); i++) { - const char *str = color_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return i - 1; - } - i = strtol(name, &end, 10); - if (end - name == len && i >= -1 && i <= 255) - return i; - return -2; -} - -static int parse_attr(const char *name, int len) -{ - static const int attr_values[] = { 1, 2, 4, 5, 7 }; - static const char * const attr_names[] = { - "bold", "dim", "ul", "blink", "reverse" - }; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(attr_names); i++) { - const char *str = attr_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return attr_values[i]; - } - return -1; -} - -void color_parse(const char *value, const char *var, char *dst) -{ - color_parse_mem(value, strlen(value), var, dst); -} - -void color_parse_mem(const char *value, int value_len, const char *var, - char *dst) -{ - const char *ptr = value; - int len = value_len; - int attr = -1; - int fg = -2; - int bg = -2; - - if (!strncasecmp(value, "reset", len)) { - strcpy(dst, PERF_COLOR_RESET); - return; - } - - /* [fg [bg]] [attr] */ - while (len > 0) { - const char *word = ptr; - int val, wordlen = 0; - - while (len > 0 && !isspace(word[wordlen])) { - wordlen++; - len--; - } - - ptr = word + wordlen; - while (len > 0 && isspace(*ptr)) { - ptr++; - len--; - } - - val = parse_color(word, wordlen); - if (val >= -1) { - if (fg == -2) { - fg = val; - continue; - } - if (bg == -2) { - bg = val; - continue; - } - goto bad; - } - val = parse_attr(word, wordlen); - if (val < 0 || attr != -1) - goto bad; - attr = val; - } - - if (attr >= 0 || fg >= 0 || bg >= 0) { - int sep = 0; - - *dst++ = '\033'; - *dst++ = '['; - if (attr >= 0) { - *dst++ = '0' + attr; - sep++; - } - if (fg >= 0) { - if (sep++) - *dst++ = ';'; - if (fg < 8) { - *dst++ = '3'; - *dst++ = '0' + fg; - } else { - dst += sprintf(dst, "38;5;%d", fg); - } - } - if (bg >= 0) { - if (sep++) - *dst++ = ';'; - if (bg < 8) { - *dst++ = '4'; - *dst++ = '0' + bg; - } else { - dst += sprintf(dst, "48;5;%d", bg); - } - } - *dst++ = 'm'; - } - *dst = 0; - return; -bad: - die("bad color value '%.*s' for variable '%s'", value_len, value, var); -} - int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) { if (value) { diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 0a594b8a0c26..38146f922c54 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -30,8 +30,6 @@ extern int perf_use_color_default; int perf_color_default_config(const char *var, const char *value, void *cb); int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); -void color_parse(const char *value, const char *var, char *dst); -void color_parse_mem(const char *value, int len, const char *var, char *dst); int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 45be944d450a..c2f7d3b90966 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -532,12 +532,8 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) break; cache_offset = offset & DSO__DATA_CACHE_MASK; - ret = -EINVAL; - if (-1 == lseek(dso->data.fd, cache_offset, SEEK_SET)) - break; - - ret = read(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE); + ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset); if (ret <= 0) break; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 3782c82c6e44..ced92841ff97 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -139,6 +139,7 @@ struct dso { u32 status_seen; size_t file_size; struct list_head open_entry; + u64 frame_offset; } data; union { /* Tool specific area */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 2e507b5025a3..28b8ce86bf12 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1436,33 +1436,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) return printed + fprintf(fp, "\n"); } -int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, - int err, char *buf, size_t size) -{ - char sbuf[128]; - - switch (err) { - case ENOENT: - scnprintf(buf, size, "%s", - "Error:\tUnable to find debugfs\n" - "Hint:\tWas your kernel compiled with debugfs support?\n" - "Hint:\tIs the debugfs filesystem mounted?\n" - "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); - break; - case EACCES: - scnprintf(buf, size, - "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" - "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - debugfs_mountpoint, debugfs_mountpoint); - break; - default: - scnprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); - break; - } - - return 0; -} - int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, int err, char *buf, size_t size) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 0ba93f67ab94..c94a9e03ecf1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -183,7 +183,6 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); -int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1e90c8557ede..ea51a90e20a0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -709,6 +709,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (opts->sample_weight) perf_evsel__set_sample_bit(evsel, WEIGHT); + attr->task = track; attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; @@ -797,6 +798,9 @@ int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) { + if (ncpus == 0 || nthreads == 0) + return 0; + if (evsel->system_wide) nthreads = 1; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b20e40c74468..1f407f7352a7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2237,6 +2237,7 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz, * - unique number to identify actual perf.data files * - encode endianness of file */ + ph->version = PERF_HEADER_VERSION_2; /* check magic number with one endianness */ if (magic == __perf_magic2) @@ -2247,7 +2248,6 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz, return -1; ph->needs_swap = true; - ph->version = PERF_HEADER_VERSION_2; return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 182395546ddc..70b48a65064c 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -241,6 +241,20 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) return he->stat.period == 0; } +static void hists__delete_entry(struct hists *hists, struct hist_entry *he) +{ + rb_erase(&he->rb_node, &hists->entries); + + if (sort__need_collapse) + rb_erase(&he->rb_node_in, &hists->entries_collapsed); + + --hists->nr_entries; + if (!he->filtered) + --hists->nr_non_filtered_entries; + + hist_entry__delete(he); +} + void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) { struct rb_node *next = rb_first(&hists->entries); @@ -258,16 +272,7 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) (zap_kernel && n->level != '.') || hists__decay_entry(hists, n)) && !n->used) { - rb_erase(&n->rb_node, &hists->entries); - - if (sort__need_collapse) - rb_erase(&n->rb_node_in, &hists->entries_collapsed); - - --hists->nr_entries; - if (!n->filtered) - --hists->nr_non_filtered_entries; - - hist_entry__free(n); + hists__delete_entry(hists, n); } } } @@ -281,16 +286,7 @@ void hists__delete_entries(struct hists *hists) n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &hists->entries); - - if (sort__need_collapse) - rb_erase(&n->rb_node_in, &hists->entries_collapsed); - - --hists->nr_entries; - if (!n->filtered) - --hists->nr_non_filtered_entries; - - hist_entry__free(n); + hists__delete_entry(hists, n); } } @@ -433,6 +429,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists, if (!he) return NULL; + hists->nr_entries++; + rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); out: @@ -915,7 +913,7 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->cmp(left, right); + cmp = fmt->cmp(fmt, left, right); if (cmp) break; } @@ -933,7 +931,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->collapse(left, right); + cmp = fmt->collapse(fmt, left, right); if (cmp) break; } @@ -941,7 +939,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) return cmp; } -void hist_entry__free(struct hist_entry *he) +void hist_entry__delete(struct hist_entry *he) { zfree(&he->branch_info); zfree(&he->mem_info); @@ -981,7 +979,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, iter->callchain, he->callchain); } - hist_entry__free(he); + hist_entry__delete(he); return false; } @@ -1063,7 +1061,7 @@ static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->sort(a, b); + cmp = fmt->sort(fmt, a, b); if (cmp) break; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 46bd50344f85..2b690d028907 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -119,7 +119,7 @@ int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); -void hist_entry__free(struct hist_entry *); +void hist_entry__delete(struct hist_entry *he); void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); @@ -195,9 +195,12 @@ struct perf_hpp_fmt { struct hist_entry *he); int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he); - int64_t (*cmp)(struct hist_entry *a, struct hist_entry *b); - int64_t (*collapse)(struct hist_entry *a, struct hist_entry *b); - int64_t (*sort)(struct hist_entry *a, struct hist_entry *b); + int64_t (*cmp)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); + int64_t (*collapse)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); + int64_t (*sort)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); struct list_head list; struct list_head sort_list; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 77b43fe43d55..7f8ec6ce2823 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -526,7 +526,7 @@ do { \ } int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type) + void *ptr, char *type, u64 len) { struct perf_event_attr attr; @@ -536,14 +536,15 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, if (parse_breakpoint_type(type, &attr)) return -EINVAL; - /* - * We should find a nice way to override the access length - * Provide some defaults for now - */ - if (attr.bp_type == HW_BREAKPOINT_X) - attr.bp_len = sizeof(long); - else - attr.bp_len = HW_BREAKPOINT_LEN_4; + /* Provide some defaults if len is not specified */ + if (!len) { + if (attr.bp_type == HW_BREAKPOINT_X) + len = sizeof(long); + else + len = HW_BREAKPOINT_LEN_4; + } + + attr.bp_len = len; attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; @@ -1121,7 +1122,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, return; for_each_subsystem(sys_dir, sys_dirent, sys_next) { - if (subsys_glob != NULL && + if (subsys_glob != NULL && !strglobmatch(sys_dirent.d_name, subsys_glob)) continue; @@ -1132,7 +1133,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, continue; for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { - if (event_glob != NULL && + if (event_glob != NULL && !strglobmatch(evt_dirent.d_name, event_glob)) continue; @@ -1305,7 +1306,7 @@ static void print_symbol_events(const char *event_glob, unsigned type, for (i = 0; i < max; i++, syms++) { - if (event_glob != NULL && + if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; @@ -1366,7 +1367,7 @@ void print_events(const char *event_glob, bool name_only) printf("\n"); printf(" %-50s [%s]\n", - "mem:<addr>[:access]", + "mem:<addr>[/len][:access]", event_type_descriptors[PERF_TYPE_BREAKPOINT]); printf("\n"); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index db2cf78ff0f3..ff6e1fa4111e 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -71,6 +71,7 @@ struct parse_events_term { int type_val; int type_term; struct list_head list; + bool used; }; struct parse_events_evlist { @@ -104,7 +105,7 @@ int parse_events_add_numeric(struct list_head *list, int *idx, int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2); int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type); + void *ptr, char *type, u64 len); int parse_events_add_pmu(struct list_head *list, int *idx, char *pmu , struct list_head *head_config); enum perf_pmu_event_symbol_type diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 906630bbf8eb..94eacb6c1ef7 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -159,6 +159,7 @@ branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE <mem>{ {modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); } : { return ':'; } +"/" { return '/'; } {num_dec} { return value(yyscanner, 10); } {num_hex} { return value(yyscanner, 16); } /* diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 93c4c9fbc922..72def077dbbf 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -326,6 +326,28 @@ PE_NAME_CACHE_TYPE } event_legacy_mem: +PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc +{ + struct parse_events_evlist *data = _data; + struct list_head *list; + + ALLOC_LIST(list); + ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + (void *) $2, $6, $4)); + $$ = list; +} +| +PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc +{ + struct parse_events_evlist *data = _data; + struct list_head *list; + + ALLOC_LIST(list); + ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + (void *) $2, NULL, $4)); + $$ = list; +} +| PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc { struct parse_events_evlist *data = _data; @@ -333,7 +355,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_breakpoint(list, &data->idx, - (void *) $2, $4)); + (void *) $2, $4, 0)); $$ = list; } | @@ -344,7 +366,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_breakpoint(list, &data->idx, - (void *) $2, NULL)); + (void *) $2, NULL, 0)); $$ = list; } diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index f62dee7bd924..4a015f77e2b5 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -46,7 +46,7 @@ static int get_value(struct parse_opt_ctx_t *p, return opterror(opt, "is not usable", flags); if (opt->flags & PARSE_OPT_EXCLUSIVE) { - if (p->excl_opt) { + if (p->excl_opt && p->excl_opt != opt) { char msg[128]; if (((flags & OPT_SHORT) && p->excl_opt->short_name) || diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 5c9c4947cfb4..48411674da0f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -551,31 +551,68 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, } /* + * Term is a string term, and might be a param-term. Try to look up it's value + * in the remaining terms. + * - We have a term like "base-or-format-term=param-term", + * - We need to find the value supplied for "param-term" (with param-term named + * in a config string) later on in the term list. + */ +static int pmu_resolve_param_term(struct parse_events_term *term, + struct list_head *head_terms, + __u64 *value) +{ + struct parse_events_term *t; + + list_for_each_entry(t, head_terms, list) { + if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (!strcmp(t->config, term->config)) { + t->used = true; + *value = t->val.num; + return 0; + } + } + } + + if (verbose) + printf("Required parameter '%s' not specified\n", term->config); + + return -1; +} + +/* * Setup one of config[12] attr members based on the * user input data - term parameter. */ static int pmu_config_term(struct list_head *formats, struct perf_event_attr *attr, struct parse_events_term *term, + struct list_head *head_terms, bool zero) { struct perf_pmu_format *format; __u64 *vp; + __u64 val; + + /* + * If this is a parameter we've already used for parameterized-eval, + * skip it in normal eval. + */ + if (term->used) + return 0; /* - * Support only for hardcoded and numnerial terms. * Hardcoded terms should be already in, so nothing * to be done for them. */ if (parse_events__is_hardcoded_term(term)) return 0; - if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) - return -EINVAL; - format = pmu_find_format(formats, term->config); - if (!format) + if (!format) { + if (verbose) + printf("Invalid event/parameter '%s'\n", term->config); return -EINVAL; + } switch (format->value) { case PERF_PMU_FORMAT_VALUE_CONFIG: @@ -592,11 +629,25 @@ static int pmu_config_term(struct list_head *formats, } /* - * XXX If we ever decide to go with string values for - * non-hardcoded terms, here's the place to translate - * them into value. + * Either directly use a numeric term, or try to translate string terms + * using event parameters. */ - pmu_format_value(format->bits, term->val.num, vp, zero); + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) + val = term->val.num; + else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { + if (strcmp(term->val.str, "?")) { + if (verbose) + pr_info("Invalid sysfs entry %s=%s\n", + term->config, term->val.str); + return -EINVAL; + } + + if (pmu_resolve_param_term(term, head_terms, &val)) + return -EINVAL; + } else + return -EINVAL; + + pmu_format_value(format->bits, val, vp, zero); return 0; } @@ -607,9 +658,10 @@ int perf_pmu__config_terms(struct list_head *formats, { struct parse_events_term *term; - list_for_each_entry(term, head_terms, list) - if (pmu_config_term(formats, attr, term, zero)) + list_for_each_entry(term, head_terms, list) { + if (pmu_config_term(formats, attr, term, head_terms, zero)) return -EINVAL; + } return 0; } @@ -767,10 +819,36 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) set_bit(b, bits); } +static int sub_non_neg(int a, int b) +{ + if (b > a) + return 0; + return a - b; +} + static char *format_alias(char *buf, int len, struct perf_pmu *pmu, struct perf_pmu_alias *alias) { - snprintf(buf, len, "%s/%s/", pmu->name, alias->name); + struct parse_events_term *term; + int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); + + list_for_each_entry(term, &alias->terms, list) { + if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) + used += snprintf(buf + used, sub_non_neg(len, used), + ",%s=%s", term->config, + term->val.str); + } + + if (sub_non_neg(len, used) > 0) { + buf[used] = '/'; + used++; + } + if (sub_non_neg(len, used) > 0) { + buf[used] = '\0'; + used++; + } else + buf[len - 1] = '\0'; + return buf; } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 3dda85ca50c1..d906d0ad5d40 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -768,7 +768,7 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, Py_DECREF(file); goto free_list; } - + Py_DECREF(file); } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d808a328f4dc..0c815a40a6e8 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -89,7 +89,7 @@ static void handler_call_die(const char *handler_name) /* * Insert val into into the dictionary and decrement the reference counter. - * This is necessary for dictionaries since PyDict_SetItemString() does not + * This is necessary for dictionaries since PyDict_SetItemString() does not * steal a reference, as opposed to PyTuple_SetItem(). */ static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5f0e05a76c05..0baf75f12b7c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -274,7 +274,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->id_index == NULL) tool->id_index = process_id_index_stub; } - + static void swap_sample_id_all(union perf_event *event, void *data) { void *end = (void *) event + event->header.size; @@ -1251,9 +1251,9 @@ fetch_mmaped_event(struct perf_session *session, #define NUM_MMAPS 128 #endif -int __perf_session__process_events(struct perf_session *session, - u64 data_offset, u64 data_size, - u64 file_size, struct perf_tool *tool) +static int __perf_session__process_events(struct perf_session *session, + u64 data_offset, u64 data_size, + u64 file_size, struct perf_tool *tool) { int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, size; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index dc26ebf60fe4..6d663dc76404 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -49,9 +49,6 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, union perf_event **event_ptr, struct perf_sample *sample); -int __perf_session__process_events(struct perf_session *session, - u64 data_offset, u64 data_size, u64 size, - struct perf_tool *tool); int perf_session__process_events(struct perf_session *session, struct perf_tool *tool); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 9139dda9f9a3..7a39c1ed8d37 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1304,6 +1304,37 @@ static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return hse->se->se_snprintf(he, hpp->buf, hpp->size, len); } +static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + return hse->se->se_cmp(a, b); +} + +static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *); + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp; + return collapse_fn(a, b); +} + +static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *); + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + sort_fn = hse->se->se_sort ?: hse->se->se_cmp; + return sort_fn(a, b); +} + static struct hpp_sort_entry * __sort_dimension__alloc_hpp(struct sort_dimension *sd) { @@ -1322,9 +1353,9 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.entry = __sort__hpp_entry; hse->hpp.color = NULL; - hse->hpp.cmp = sd->entry->se_cmp; - hse->hpp.collapse = sd->entry->se_collapse ? : sd->entry->se_cmp; - hse->hpp.sort = sd->entry->se_sort ? : hse->hpp.collapse; + hse->hpp.cmp = __sort__hpp_cmp; + hse->hpp.collapse = __sort__hpp_collapse; + hse->hpp.sort = __sort__hpp_sort; INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 06fcd1bf98b6..b24f9d8727a8 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -574,13 +574,16 @@ static int decompress_kmodule(struct dso *dso, const char *name, const char *ext = strrchr(name, '.'); char tmpbuf[] = "/tmp/perf-kmod-XXXXXX"; - if ((type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && - type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP) || - type != dso->symtab_type) + if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && + type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP && + type != DSO_BINARY_TYPE__BUILD_ID_CACHE) return -1; - if (!ext || !is_supported_compression(ext + 1)) - return -1; + if (!ext || !is_supported_compression(ext + 1)) { + ext = strrchr(dso->name, '.'); + if (!ext || !is_supported_compression(ext + 1)) + return -1; + } fd = mkstemp(tmpbuf); if (fd < 0) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a194702a0a2f..a69066865a55 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -685,7 +685,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, struct machine *machine = kmaps->machine; struct map *curr_map = map; struct symbol *pos; - int count = 0, moved = 0; + int count = 0, moved = 0; struct rb_root *root = &dso->symbols[map->type]; struct rb_node *next = rb_first(root); int kernel_range = 0; diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 6edf535f65c2..e3c40a520a25 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -266,14 +266,17 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, u64 *fde_count) { int ret = -EINVAL, fd; - u64 offset; + u64 offset = dso->data.frame_offset; - fd = dso__data_fd(dso, machine); - if (fd < 0) - return -EINVAL; + if (offset == 0) { + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; - /* Check the .eh_frame section for unwinding info */ - offset = elf_section_offset(fd, ".eh_frame_hdr"); + /* Check the .eh_frame section for unwinding info */ + offset = elf_section_offset(fd, ".eh_frame_hdr"); + dso->data.frame_offset = offset; + } if (offset) ret = unwind_spec_ehframe(dso, machine, offset, @@ -287,14 +290,20 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, static int read_unwind_spec_debug_frame(struct dso *dso, struct machine *machine, u64 *offset) { - int fd = dso__data_fd(dso, machine); + int fd; + u64 ofs = dso->data.frame_offset; - if (fd < 0) - return -EINVAL; + if (ofs == 0) { + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; - /* Check the .debug_frame section for unwinding info */ - *offset = elf_section_offset(fd, ".debug_frame"); + /* Check the .debug_frame section for unwinding info */ + ofs = elf_section_offset(fd, ".debug_frame"); + dso->data.frame_offset = ofs; + } + *offset = ofs; if (*offset) return 0; diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh index abe14b7f36e9..bb99cde3f5f9 100755 --- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh +++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh @@ -24,7 +24,7 @@ ncpus=`grep '^processor' /proc/cpuinfo | wc -l` idlecpus=`mpstat | tail -1 | \ - awk -v ncpus=$ncpus '{ print ncpus * ($7 + $12) / 100 }'` + awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'` awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null ' BEGIN { cpus2use = idlecpus; diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index d6cc07fc137f..559e01ac86be 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -30,6 +30,7 @@ else echo Unreadable results directory: $i exit 1 fi +. tools/testing/selftests/rcutorture/bin/functions.sh configfile=`echo $i | sed -e 's/^.*\///'` ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` @@ -48,4 +49,21 @@ else title="$title ($ngpsps per second)" fi echo $title + nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` + if test -z "$nclosecalls" + then + exit 0 + fi + if test "$nclosecalls" -eq 0 + then + exit 0 + fi + # Compute number of close calls per tenth of an hour + nclosecalls10=`awk -v nclosecalls=$nclosecalls -v dur=$dur 'BEGIN { print int(nclosecalls * 36000 / dur) }' < /dev/null` + if test $nclosecalls10 -gt 5 -a $nclosecalls -gt 1 + then + print_bug $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i + else + print_warning $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i + fi fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 8ca9f21f2efc..5236e073919d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -8,9 +8,9 @@ # # Usage: kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args # -# qemu-args defaults to "-nographic", along with arguments specifying the -# number of CPUs and other options generated from -# the underlying CPU architecture. +# qemu-args defaults to "-enable-kvm -soundhw pcspk -nographic", along with +# arguments specifying the number of CPUs and other +# options generated from the underlying CPU architecture. # boot_args defaults to value returned by the per_version_boot_params # shell function. # @@ -138,7 +138,7 @@ then fi # Generate -smp qemu argument. -qemu_args="-nographic $qemu_args" +qemu_args="-enable-kvm -soundhw pcspk -nographic $qemu_args" cpu_count=`configNR_CPUS.sh $config_template` cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"` vcpus=`identify_qemu_vcpus` @@ -168,6 +168,7 @@ then touch $resdir/buildonly exit 0 fi +echo "NOTE: $QEMU either did not run or was interactive" > $builddir/console.log echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd ( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) & qemu_pid=$! diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh index 499d1e598e42..a6b57622c2e5 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-build.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh @@ -26,12 +26,15 @@ # # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> -T=$1 +F=$1 title=$2 +T=/tmp/parse-build.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T . functions.sh -if grep -q CC < $T +if grep -q CC < $F then : else @@ -39,18 +42,21 @@ else exit 1 fi -if grep -q "error:" < $T +if grep -q "error:" < $F then print_bug $title build errors: - grep "error:" < $T + grep "error:" < $F exit 2 fi -exit 0 -if egrep -q "rcu[^/]*\.c.*warning:|rcu.*\.h.*warning:" < $T +grep warning: < $F > $T/warnings +grep "include/linux/*rcu*\.h:" $T/warnings > $T/hwarnings +grep "kernel/rcu/[^/]*:" $T/warnings > $T/cwarnings +cat $T/hwarnings $T/cwarnings > $T/rcuwarnings +if test -s $T/rcuwarnings then print_warning $title build errors: - egrep "rcu[^/]*\.c.*warning:|rcu.*\.h.*warning:" < $T + cat $T/rcuwarnings exit 2 fi exit 0 diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index f962ba4cf68b..d8f35cf116be 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -36,7 +36,7 @@ if grep -Pq '\x00' < $file then print_warning Console output contains nul bytes, old qemu still running? fi -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $T +egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $T if test -s $T then print_warning Assertion failure in $file $title |