diff options
310 files changed, 7132 insertions, 3840 deletions
diff --git a/Documentation/devicetree/bindings/usb/atmel-usb.txt b/Documentation/devicetree/bindings/usb/atmel-usb.txt index 1be8d7a26c15..5883b73ea1b5 100644 --- a/Documentation/devicetree/bindings/usb/atmel-usb.txt +++ b/Documentation/devicetree/bindings/usb/atmel-usb.txt @@ -79,9 +79,9 @@ Atmel High-Speed USB device controller Required properties: - compatible: Should be one of the following - "at91sam9rl-udc" - "at91sam9g45-udc" - "sama5d3-udc" + "atmel,at91sam9rl-udc" + "atmel,at91sam9g45-udc" + "atmel,sama5d3-udc" - reg: Address and length of the register set for the device - interrupts: Should contain usba interrupt - clocks: Should reference the peripheral and host clocks diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 51f4221657bf..611c52267d24 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -321,6 +321,7 @@ Code Seq#(hex) Include File Comments 0xDB 00-0F drivers/char/mwave/mwavepub.h 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ <mailto:aherrman@de.ibm.com> +0xE5 00-3F linux/fuse.h 0xEC 00-01 drivers/platform/chrome/cros_ec_dev.h ChromeOS EC driver 0xF3 00-3F drivers/usb/misc/sisusbvga/sisusb.h sisfb (in development) <mailto:thomas@winischhofer.net> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index afe7e2bbbc23..1d6f0459cd7b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -293,6 +293,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS Format: To spoof as Windows 98: ="Microsoft Windows" + acpi_rev_override [ACPI] Override the _REV object to return 5 (instead + of 2 which is mandated by ACPI 6) as the supported ACPI + specification revision (when using this switch, it may + be necessary to carry out a cold reboot _twice_ in a + row to make it take effect on the platform firmware). + acpi_osi= [HW,ACPI] Modify list of supported OS interface strings acpi_osi="string1" # add string1 acpi_osi="!string2" # remove string2 diff --git a/MAINTAINERS b/MAINTAINERS index 993d4cfd5aa0..86ea2084bc58 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4430,9 +4430,11 @@ FUSE: FILESYSTEM IN USERSPACE M: Miklos Szeredi <miklos@szeredi.hu> L: fuse-devel@lists.sourceforge.net W: http://fuse.sourceforge.net/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git S: Maintained F: fs/fuse/ F: include/uapi/linux/fuse.h +F: Documentation/filesystems/fuse.txt FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit) M: Rik Faith <faith@cs.unc.edu> @@ -335,15 +335,6 @@ endif export KBUILD_MODULES KBUILD_BUILTIN export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD -ifneq ($(CC),) -ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1) -COMPILER := clang -else -COMPILER := gcc -endif -export COMPILER -endif - # We need some generic definitions (do not try to remake the file). scripts/Kbuild.include: ; include scripts/Kbuild.include @@ -670,6 +661,13 @@ endif endif KBUILD_CFLAGS += $(stackp-flag) +ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1) +COMPILER := clang +else +COMPILER := gcc +endif +export COMPILER + ifeq ($(COMPILER),clang) KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,) diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index 0854d4493da7..3de9b761cc1a 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -305,7 +305,7 @@ spi0_pins: spi0-pins { marvell,pins = "mpp36", "mpp37", "mpp38", "mpp39"; - marvell,function = "spi"; + marvell,function = "spi0"; }; uart2_pins: uart2-pins { diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index d260ba779ae5..18177f5a7464 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -1148,7 +1148,7 @@ usb2: gadget@fff78000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00600000 0x80000 0xfff78000 0x400>; interrupts = <27 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 7521bdf17ef2..b6c8df8d380e 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1108,7 +1108,7 @@ usb2: gadget@f803c000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00500000 0x80000 0xf803c000 0x400>; interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 5ab7548e04e1..9e2444b07bce 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1321,7 +1321,7 @@ usb0: gadget@00500000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00500000 0x100000 0xf8030000 0x4000>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 2>; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 653a1f851f2b..3ee22ee13c5a 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -127,7 +127,7 @@ usb0: gadget@00400000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00400000 0x100000 0xfc02c000 0x4000>; interrupts = <47 IRQ_TYPE_LEVEL_HIGH 2>; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index fd6a6d23bc20..6d83a1bf0c74 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -169,6 +169,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_ATMEL=y +CONFIG_MTD_NAND_BRCMNAND=y CONFIG_MTD_NAND_DAVINCI=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index f8f7398c74c2..7dac3086e361 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -15,6 +15,8 @@ * that causes it to save wrong values... Be aware! */ +#include <linux/init.h> + #include <asm/assembler.h> #include <asm/memory.h> #include <asm/glue-df.h> diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index e9184feffc4e..0ac9e4b3b265 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -19,7 +19,6 @@ config ARCH_BCM_IPROC select ARCH_REQUIRE_GPIOLIB select ARM_AMBA select PINCTRL - select MTD_NAND_BRCMNAND help This enables support for systems based on Broadcom IPROC architected SoCs. The IPROC complex contains one or more ARM CPUs along with common diff --git a/arch/arm/mach-dove/include/mach/irqs.h b/arch/arm/mach-dove/include/mach/irqs.h index 03d401d20453..3f29e6bca058 100644 --- a/arch/arm/mach-dove/include/mach/irqs.h +++ b/arch/arm/mach-dove/include/mach/irqs.h @@ -14,73 +14,73 @@ /* * Dove Low Interrupt Controller */ -#define IRQ_DOVE_BRIDGE 0 -#define IRQ_DOVE_H2C 1 -#define IRQ_DOVE_C2H 2 -#define IRQ_DOVE_NAND 3 -#define IRQ_DOVE_PDMA 4 -#define IRQ_DOVE_SPI1 5 -#define IRQ_DOVE_SPI0 6 -#define IRQ_DOVE_UART_0 7 -#define IRQ_DOVE_UART_1 8 -#define IRQ_DOVE_UART_2 9 -#define IRQ_DOVE_UART_3 10 -#define IRQ_DOVE_I2C 11 -#define IRQ_DOVE_GPIO_0_7 12 -#define IRQ_DOVE_GPIO_8_15 13 -#define IRQ_DOVE_GPIO_16_23 14 -#define IRQ_DOVE_PCIE0_ERR 15 -#define IRQ_DOVE_PCIE0 16 -#define IRQ_DOVE_PCIE1_ERR 17 -#define IRQ_DOVE_PCIE1 18 -#define IRQ_DOVE_I2S0 19 -#define IRQ_DOVE_I2S0_ERR 20 -#define IRQ_DOVE_I2S1 21 -#define IRQ_DOVE_I2S1_ERR 22 -#define IRQ_DOVE_USB_ERR 23 -#define IRQ_DOVE_USB0 24 -#define IRQ_DOVE_USB1 25 -#define IRQ_DOVE_GE00_RX 26 -#define IRQ_DOVE_GE00_TX 27 -#define IRQ_DOVE_GE00_MISC 28 -#define IRQ_DOVE_GE00_SUM 29 -#define IRQ_DOVE_GE00_ERR 30 -#define IRQ_DOVE_CRYPTO 31 +#define IRQ_DOVE_BRIDGE (1 + 0) +#define IRQ_DOVE_H2C (1 + 1) +#define IRQ_DOVE_C2H (1 + 2) +#define IRQ_DOVE_NAND (1 + 3) +#define IRQ_DOVE_PDMA (1 + 4) +#define IRQ_DOVE_SPI1 (1 + 5) +#define IRQ_DOVE_SPI0 (1 + 6) +#define IRQ_DOVE_UART_0 (1 + 7) +#define IRQ_DOVE_UART_1 (1 + 8) +#define IRQ_DOVE_UART_2 (1 + 9) +#define IRQ_DOVE_UART_3 (1 + 10) +#define IRQ_DOVE_I2C (1 + 11) +#define IRQ_DOVE_GPIO_0_7 (1 + 12) +#define IRQ_DOVE_GPIO_8_15 (1 + 13) +#define IRQ_DOVE_GPIO_16_23 (1 + 14) +#define IRQ_DOVE_PCIE0_ERR (1 + 15) +#define IRQ_DOVE_PCIE0 (1 + 16) +#define IRQ_DOVE_PCIE1_ERR (1 + 17) +#define IRQ_DOVE_PCIE1 (1 + 18) +#define IRQ_DOVE_I2S0 (1 + 19) +#define IRQ_DOVE_I2S0_ERR (1 + 20) +#define IRQ_DOVE_I2S1 (1 + 21) +#define IRQ_DOVE_I2S1_ERR (1 + 22) +#define IRQ_DOVE_USB_ERR (1 + 23) +#define IRQ_DOVE_USB0 (1 + 24) +#define IRQ_DOVE_USB1 (1 + 25) +#define IRQ_DOVE_GE00_RX (1 + 26) +#define IRQ_DOVE_GE00_TX (1 + 27) +#define IRQ_DOVE_GE00_MISC (1 + 28) +#define IRQ_DOVE_GE00_SUM (1 + 29) +#define IRQ_DOVE_GE00_ERR (1 + 30) +#define IRQ_DOVE_CRYPTO (1 + 31) /* * Dove High Interrupt Controller */ -#define IRQ_DOVE_AC97 32 -#define IRQ_DOVE_PMU 33 -#define IRQ_DOVE_CAM 34 -#define IRQ_DOVE_SDIO0 35 -#define IRQ_DOVE_SDIO1 36 -#define IRQ_DOVE_SDIO0_WAKEUP 37 -#define IRQ_DOVE_SDIO1_WAKEUP 38 -#define IRQ_DOVE_XOR_00 39 -#define IRQ_DOVE_XOR_01 40 -#define IRQ_DOVE_XOR0_ERR 41 -#define IRQ_DOVE_XOR_10 42 -#define IRQ_DOVE_XOR_11 43 -#define IRQ_DOVE_XOR1_ERR 44 -#define IRQ_DOVE_LCD_DCON 45 -#define IRQ_DOVE_LCD1 46 -#define IRQ_DOVE_LCD0 47 -#define IRQ_DOVE_GPU 48 -#define IRQ_DOVE_PERFORM_MNTR 49 -#define IRQ_DOVE_VPRO_DMA1 51 -#define IRQ_DOVE_SSP_TIMER 54 -#define IRQ_DOVE_SSP 55 -#define IRQ_DOVE_MC_L2_ERR 56 -#define IRQ_DOVE_CRYPTO_ERR 59 -#define IRQ_DOVE_GPIO_24_31 60 -#define IRQ_DOVE_HIGH_GPIO 61 -#define IRQ_DOVE_SATA 62 +#define IRQ_DOVE_AC97 (1 + 32) +#define IRQ_DOVE_PMU (1 + 33) +#define IRQ_DOVE_CAM (1 + 34) +#define IRQ_DOVE_SDIO0 (1 + 35) +#define IRQ_DOVE_SDIO1 (1 + 36) +#define IRQ_DOVE_SDIO0_WAKEUP (1 + 37) +#define IRQ_DOVE_SDIO1_WAKEUP (1 + 38) +#define IRQ_DOVE_XOR_00 (1 + 39) +#define IRQ_DOVE_XOR_01 (1 + 40) +#define IRQ_DOVE_XOR0_ERR (1 + 41) +#define IRQ_DOVE_XOR_10 (1 + 42) +#define IRQ_DOVE_XOR_11 (1 + 43) +#define IRQ_DOVE_XOR1_ERR (1 + 44) +#define IRQ_DOVE_LCD_DCON (1 + 45) +#define IRQ_DOVE_LCD1 (1 + 46) +#define IRQ_DOVE_LCD0 (1 + 47) +#define IRQ_DOVE_GPU (1 + 48) +#define IRQ_DOVE_PERFORM_MNTR (1 + 49) +#define IRQ_DOVE_VPRO_DMA1 (1 + 51) +#define IRQ_DOVE_SSP_TIMER (1 + 54) +#define IRQ_DOVE_SSP (1 + 55) +#define IRQ_DOVE_MC_L2_ERR (1 + 56) +#define IRQ_DOVE_CRYPTO_ERR (1 + 59) +#define IRQ_DOVE_GPIO_24_31 (1 + 60) +#define IRQ_DOVE_HIGH_GPIO (1 + 61) +#define IRQ_DOVE_SATA (1 + 62) /* * DOVE General Purpose Pins */ -#define IRQ_DOVE_GPIO_START 64 +#define IRQ_DOVE_GPIO_START 65 #define NR_GPIO_IRQS 64 /* diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c index 4a5a7aedcb76..df0223f76fa9 100644 --- a/arch/arm/mach-dove/irq.c +++ b/arch/arm/mach-dove/irq.c @@ -126,14 +126,14 @@ __exception_irq_entry dove_legacy_handle_irq(struct pt_regs *regs) stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_LOW_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_LOW_OFF); if (stat) { - unsigned int hwirq = __fls(stat); + unsigned int hwirq = 1 + __fls(stat); handle_IRQ(hwirq, regs); return; } stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_HIGH_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_HIGH_OFF); if (stat) { - unsigned int hwirq = 32 + __fls(stat); + unsigned int hwirq = 33 + __fls(stat); handle_IRQ(hwirq, regs); return; } @@ -144,8 +144,8 @@ void __init dove_init_irq(void) { int i; - orion_irq_init(0, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); - orion_irq_init(32, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); + orion_irq_init(1, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); + orion_irq_init(33, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); #ifdef CONFIG_MULTI_IRQ_HANDLER set_handle_irq(dove_legacy_handle_irq); diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 48e4c4b3cd1c..b093a196e801 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -13,12 +13,9 @@ */ #include <linux/linkage.h> -#include <linux/init.h> #include <asm/assembler.h> - __CPUINIT - ENTRY(mvebu_cortex_a9_secondary_startup) ARM_BE8(setend be) bl armada_38x_scu_power_up diff --git a/arch/arm/mach-mvebu/platsmp-a9.c b/arch/arm/mach-mvebu/platsmp-a9.c index df0a9cc5da59..3d5000481c11 100644 --- a/arch/arm/mach-mvebu/platsmp-a9.c +++ b/arch/arm/mach-mvebu/platsmp-a9.c @@ -24,7 +24,7 @@ extern void mvebu_cortex_a9_secondary_startup(void); -static int __cpuinit mvebu_cortex_a9_boot_secondary(unsigned int cpu, +static int mvebu_cortex_a9_boot_secondary(unsigned int cpu, struct task_struct *idle) { int ret, hw_cpu; diff --git a/arch/arm/mach-mvebu/pm-board.c b/arch/arm/mach-mvebu/pm-board.c index 6dfd4ab97b2a..301ab38d38ba 100644 --- a/arch/arm/mach-mvebu/pm-board.c +++ b/arch/arm/mach-mvebu/pm-board.c @@ -43,6 +43,9 @@ static void mvebu_armada_xp_gp_pm_enter(void __iomem *sdram_reg, u32 srcmd) for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++) ackcmd |= BIT(pic_raw_gpios[i]); + srcmd = cpu_to_le32(srcmd); + ackcmd = cpu_to_le32(ackcmd); + /* * Wait a while, the PIC needs quite a bit of time between the * two GPIO commands. diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index 2e6ab67e2284..8fcec1cc101e 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -119,8 +119,7 @@ static int pmu_set_power_domain(int pd, bool on) * Handling of CPU cores */ -static int __cpuinit rockchip_boot_secondary(unsigned int cpu, - struct task_struct *idle) +static int rockchip_boot_secondary(unsigned int cpu, struct task_struct *idle) { int ret; diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index f61158c6ce71..5766ce2be32b 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -589,4 +589,4 @@ static int __init ve_spc_clk_init(void) platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0); return 0; } -module_init(ve_spc_clk_init); +device_initcall(ve_spc_clk_init); diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index 0bb287ca0a98..0689c3fb56e3 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -717,6 +717,19 @@ phy-names = "sata-phy"; }; + sbgpio: sbgpio@17001000{ + compatible = "apm,xgene-gpio-sb"; + reg = <0x0 0x17001000 0x0 0x400>; + #gpio-cells = <2>; + gpio-controller; + interrupts = <0x0 0x28 0x1>, + <0x0 0x29 0x1>, + <0x0 0x2a 0x1>, + <0x0 0x2b 0x1>, + <0x0 0x2c 0x1>, + <0x0 0x2d 0x1>; + }; + rtc: rtc@10510000 { compatible = "apm,xgene-rtc"; reg = <0x0 0x10510000 0x0 0x400>; diff --git a/arch/cris/arch-v10/drivers/eeprom.c b/arch/cris/arch-v10/drivers/eeprom.c index 5047a33043bd..f679a19dfeb8 100644 --- a/arch/cris/arch-v10/drivers/eeprom.c +++ b/arch/cris/arch-v10/drivers/eeprom.c @@ -848,5 +848,4 @@ static void eeprom_disable_write_protect(void) /* Write protect disabled */ } } - -module_init(eeprom_init); +device_initcall(eeprom_init); diff --git a/arch/cris/arch-v32/mm/intmem.c b/arch/cris/arch-v32/mm/intmem.c index 1b17d92cef8e..9ef56092a4c5 100644 --- a/arch/cris/arch-v32/mm/intmem.c +++ b/arch/cris/arch-v32/mm/intmem.c @@ -145,6 +145,5 @@ unsigned long crisv32_intmem_virt_to_phys(void* addr) (unsigned long)intmem_virtual + MEM_INTMEM_START + RESERVED_SIZE); } - -module_init(crisv32_intmem_init); +device_initcall(crisv32_intmem_init); diff --git a/arch/frv/mb93090-mb00/flash.c b/arch/frv/mb93090-mb00/flash.c index c0e3707c2299..e1cf802d1639 100644 --- a/arch/frv/mb93090-mb00/flash.c +++ b/arch/frv/mb93090-mb00/flash.c @@ -9,7 +9,7 @@ * 2 of the Licence, or (at your option) any later version. */ -#include <linux/init.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c index 3a428f19a001..085047f3a545 100644 --- a/arch/ia64/hp/sim/simscsi.c +++ b/arch/ia64/hp/sim/simscsi.c @@ -368,13 +368,4 @@ simscsi_init(void) scsi_host_put(host); return error; } - -static void __exit -simscsi_exit(void) -{ - scsi_remove_host(host); - scsi_host_put(host); -} - -module_init(simscsi_init); -module_exit(simscsi_exit); +device_initcall(simscsi_init); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 7f3028965064..97e48b0eefc7 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -215,10 +215,6 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) pmd_t *pmd; pte_t *pte; - if (!PageReserved(page)) - printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n", - page_address(page)); - pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ { diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c index 27793f7aa99c..5b799d4deb74 100644 --- a/arch/ia64/sn/kernel/mca.c +++ b/arch/ia64/sn/kernel/mca.c @@ -142,5 +142,4 @@ static int __init sn_salinfo_init(void) salinfo_platform_oemdata = &sn_salinfo_platform_oemdata; return 0; } - -module_init(sn_salinfo_init) +device_initcall(sn_salinfo_init); diff --git a/arch/mn10300/unit-asb2303/flash.c b/arch/mn10300/unit-asb2303/flash.c index 17fe083fcb6f..b03d8738d67c 100644 --- a/arch/mn10300/unit-asb2303/flash.c +++ b/arch/mn10300/unit-asb2303/flash.c @@ -96,5 +96,4 @@ static int __init asb2303_mtd_init(void) platform_device_register(&asb2303_sysflash); return 0; } - -module_init(asb2303_mtd_init); +device_initcall(asb2303_mtd_init); diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index d5cae55195ec..10a5ae9553fd 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -207,8 +207,7 @@ static int __init pdc_console_tty_driver_init(void) return 0; } - -module_init(pdc_console_tty_driver_init); +device_initcall(pdc_console_tty_driver_init); static struct tty_driver * pdc_console_device (struct console *c, int *index) { diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index ba0c053e25ae..518f4f5f1f43 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -543,6 +543,7 @@ static int __init perf_init(void) return 0; } +device_initcall(perf_init); /* * perf_start_counters(void) @@ -847,5 +848,3 @@ printk("perf_rdr_write\n"); } printk("perf_rdr_write done\n"); } - -module_init(perf_init); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 56f44848b044..43922509a483 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1124,4 +1124,4 @@ static int __init rtc_init(void) return PTR_ERR_OR_ZERO(pdev); } -module_init(rtc_init); +device_initcall(rtc_init); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1f614d778a8b..bb0bd7025cb8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -928,7 +928,7 @@ static int __init hugetlbpage_init(void) return 0; } #endif -module_init(hugetlbpage_init); +arch_initcall(hugetlbpage_init); void flush_dcache_icache_hugepage(struct page *page) { diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index c9adbfb65006..fcbea4b51a78 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -445,5 +445,4 @@ static int pmc_init(void) { return platform_driver_register(&pmc_driver); } - -module_init(pmc_init); +device_initcall(pmc_init); diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c index ce73ce865613..791c6142c4a7 100644 --- a/arch/powerpc/platforms/ps3/time.c +++ b/arch/powerpc/platforms/ps3/time.c @@ -92,5 +92,4 @@ static int __init ps3_rtc_init(void) return PTR_ERR_OR_ZERO(pdev); } - -module_init(ps3_rtc_init); +device_initcall(ps3_rtc_init); diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c index d631022ffb4b..38138cf8d33e 100644 --- a/arch/powerpc/sysdev/fsl_lbc.c +++ b/arch/powerpc/sysdev/fsl_lbc.c @@ -407,4 +407,4 @@ static int __init fsl_lbc_init(void) { return platform_driver_register(&fsl_lbc_ctrl_driver); } -module_init(fsl_lbc_init); +subsys_initcall(fsl_lbc_init); diff --git a/arch/sh/boards/mach-highlander/psw.c b/arch/sh/boards/mach-highlander/psw.c index 522786318d36..40e2b585d488 100644 --- a/arch/sh/boards/mach-highlander/psw.c +++ b/arch/sh/boards/mach-highlander/psw.c @@ -10,7 +10,7 @@ * for more details. */ #include <linux/io.h> -#include <linux/init.h> +#include <linux/module.h> #include <linux/interrupt.h> #include <linux/platform_device.h> #include <mach/highlander.h> diff --git a/arch/sh/boards/mach-landisk/psw.c b/arch/sh/boards/mach-landisk/psw.c index bef83522f958..5192b1f43ada 100644 --- a/arch/sh/boards/mach-landisk/psw.c +++ b/arch/sh/boards/mach-landisk/psw.c @@ -140,4 +140,4 @@ static int __init psw_init(void) { return platform_add_devices(psw_devices, ARRAY_SIZE(psw_devices)); } -module_init(psw_init); +device_initcall(psw_init); diff --git a/arch/tile/kernel/usb.c b/arch/tile/kernel/usb.c index 5af8debc6a71..f0da5a237e94 100644 --- a/arch/tile/kernel/usb.c +++ b/arch/tile/kernel/usb.c @@ -21,6 +21,7 @@ #include <linux/dma-mapping.h> #include <linux/platform_device.h> #include <linux/usb/tilegx.h> +#include <linux/init.h> #include <linux/types.h> static u64 ehci_dmamask = DMA_BIT_MASK(32); diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c index 5de7f4c56971..52c8e3c7789d 100644 --- a/arch/x86/kernel/bootflag.c +++ b/arch/x86/kernel/bootflag.c @@ -98,4 +98,4 @@ static int __init sbf_init(void) return 0; } -module_init(sbf_init); +arch_initcall(sbf_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index 7795f3f8b1d5..43dd672d788b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -530,5 +530,4 @@ static __init int bts_init(void) return perf_pmu_register(&bts_pmu, "intel_bts", -1); } - -module_init(bts_init); +arch_initcall(bts_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index 159887c3a89d..183de719628d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -1106,5 +1106,4 @@ static __init int pt_init(void) return ret; } - -module_init(pt_init); +arch_initcall(pt_init); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 5ee771859b6f..1f4acd68b98b 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -65,7 +65,7 @@ static int __init add_bus_probe(void) return of_platform_bus_probe(NULL, ce4100_ids, NULL); } -module_init(add_bus_probe); +device_initcall(add_bus_probe); #ifdef CONFIG_PCI struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index ee22c1d93ae5..b034b1b14b9c 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -72,7 +72,7 @@ asmlinkage __visible void vsmp_irq_enable(void) } PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); -static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, +static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { switch (type) { diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c index 32947ba0f62d..ee40fcb6e54d 100644 --- a/arch/x86/platform/intel-mid/intel_mid_vrtc.c +++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c @@ -173,5 +173,4 @@ static int __init intel_mid_device_create(void) return platform_device_register(&vrtc_device); } - -module_init(intel_mid_device_create); +device_initcall(intel_mid_device_create); diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 17b1ef3232e4..8ab021b1f141 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -681,6 +681,4 @@ static int iss_net_init(void) return 1; } - -module_init(iss_net_init); - +device_initcall(iss_net_init); diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c index 751f8fd7335d..3d13b042da73 100644 --- a/crypto/asymmetric_keys/pkcs7_key_type.c +++ b/crypto/asymmetric_keys/pkcs7_key_type.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) "PKCS7key: "fmt #include <linux/key.h> #include <linux/err.h> +#include <linux/module.h> #include <linux/key-type.h> #include <crypto/pkcs7.h> #include <keys/user-type.h> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index f15db002be8e..114cf48085ab 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -80,6 +80,26 @@ config ACPI_PROCFS_POWER Say N to delete power /proc/acpi/ directories that have moved to /sys/ +config ACPI_REV_OVERRIDE_POSSIBLE + bool "Allow supported ACPI revision to be overriden" + depends on X86 + default y + help + The platform firmware on some systems expects Linux to return "5" as + the supported ACPI revision which makes it expose system configuration + information in a special way. + + For example, based on what ACPI exports as the supported revision, + Dell XPS 13 (2015) configures its audio device to either work in HDA + mode or in I2S mode, where the former is supposed to be used on Linux + until the latter is fully supported (in the kernel as well as in user + space). + + This option enables a DMI-based quirk for the above Dell machine (so + that HDA audio is exposed by the platform firmware to the kernel) and + makes it possible to force the kernel to return "5" as the supported + ACPI revision via the "acpi_rev_override" command line switch. + config ACPI_EC_DEBUGFS tristate "EC read/write access through /sys/kernel/debug/ec" default n diff --git a/drivers/acpi/acpica/accommon.h b/drivers/acpi/acpica/accommon.h index 853aa2dbdb61..a8d8092ee391 100644 --- a/drivers/acpi/acpica/accommon.h +++ b/drivers/acpi/acpica/accommon.h @@ -59,5 +59,8 @@ #include "acglobal.h" /* All global variables */ #include "achware.h" /* Hardware defines and interfaces */ #include "acutils.h" /* Utility interfaces */ +#ifndef ACPI_USE_SYSTEM_CLIBRARY +#include "acclib.h" /* C library interfaces */ +#endif /* !ACPI_USE_SYSTEM_CLIBRARY */ #endif /* __ACCOMMON_H__ */ diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index a0c478784314..53f96a370762 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -61,6 +61,8 @@ ACPI_GLOBAL(struct acpi_table_header, acpi_gbl_original_dsdt_header); #if (!ACPI_REDUCED_HARDWARE) ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_FACS); +ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_facs32); +ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_facs64); #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h index 1886bde54b5d..7ac98000b46b 100644 --- a/drivers/acpi/acpica/acinterp.h +++ b/drivers/acpi/acpica/acinterp.h @@ -468,6 +468,8 @@ void acpi_ex_eisa_id_to_string(char *dest, u64 compressed_id); void acpi_ex_integer_to_string(char *dest, u64 value); +void acpi_ex_pci_cls_to_string(char *dest, u8 class_code[3]); + u8 acpi_is_valid_space_id(u8 space_id); /* diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index ffdb956391f6..bc600969c6a1 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -213,6 +213,7 @@ struct acpi_table_list { #define ACPI_TABLE_INDEX_DSDT (0) #define ACPI_TABLE_INDEX_FACS (1) +#define ACPI_TABLE_INDEX_X_FACS (2) struct acpi_find_context { char *search_for; diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 952fbe0b7231..0dd088290d80 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -66,6 +66,7 @@ #define ACPI_NS_PREFIX_IS_SCOPE 0x10 #define ACPI_NS_EXTERNAL 0x20 #define ACPI_NS_TEMPORARY 0x40 +#define ACPI_NS_OVERRIDE_IF_FOUND 0x80 /* Flags for acpi_ns_walk_namespace */ diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h index 3e9720e1f34f..c81d98d09cac 100644 --- a/drivers/acpi/acpica/acobject.h +++ b/drivers/acpi/acpica/acobject.h @@ -335,6 +335,7 @@ struct acpi_object_reference { void *object; /* name_op=>HANDLE to obj, index_op=>union acpi_operand_object */ struct acpi_namespace_node *node; /* ref_of or Namepath */ union acpi_operand_object **where; /* Target of Index */ + u8 *index_pointer; /* Used for Buffers and Strings */ u32 value; /* Used for Local/Arg/Index/ddb_handle */ }; diff --git a/drivers/acpi/acpica/acstruct.h b/drivers/acpi/acpica/acstruct.h index 87c7860b3394..44997ca02ae2 100644 --- a/drivers/acpi/acpica/acstruct.h +++ b/drivers/acpi/acpica/acstruct.h @@ -82,6 +82,7 @@ struct acpi_walk_state { u8 return_used; u8 scope_depth; u8 pass_number; /* Parse pass during table load */ + u8 namespace_override; /* Override existing objects */ u8 result_size; /* Total elements for the result stack */ u8 result_count; /* Current number of occupied elements of result stack */ u32 aml_offset; diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index d49f5c7a20d9..6de0d3573037 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -205,66 +205,6 @@ acpi_status acpi_ut_hardware_initialize(void); void acpi_ut_subsystem_shutdown(void); -/* - * utclib - Local implementations of C library functions - */ -#ifndef ACPI_USE_SYSTEM_CLIBRARY - -acpi_size acpi_ut_strlen(const char *string); - -char *acpi_ut_strchr(const char *string, int ch); - -char *acpi_ut_strcpy(char *dst_string, const char *src_string); - -char *acpi_ut_strncpy(char *dst_string, - const char *src_string, acpi_size count); - -int acpi_ut_memcmp(const char *buffer1, const char *buffer2, acpi_size count); - -int acpi_ut_strncmp(const char *string1, const char *string2, acpi_size count); - -int acpi_ut_strcmp(const char *string1, const char *string2); - -char *acpi_ut_strcat(char *dst_string, const char *src_string); - -char *acpi_ut_strncat(char *dst_string, - const char *src_string, acpi_size count); - -u32 acpi_ut_strtoul(const char *string, char **terminator, u32 base); - -char *acpi_ut_strstr(char *string1, char *string2); - -void *acpi_ut_memcpy(void *dest, const void *src, acpi_size count); - -void *acpi_ut_memset(void *dest, u8 value, acpi_size count); - -int acpi_ut_to_upper(int c); - -int acpi_ut_to_lower(int c); - -extern const u8 _acpi_ctype[]; - -#define _ACPI_XA 0x00 /* extra alphabetic - not supported */ -#define _ACPI_XS 0x40 /* extra space */ -#define _ACPI_BB 0x00 /* BEL, BS, etc. - not supported */ -#define _ACPI_CN 0x20 /* CR, FF, HT, NL, VT */ -#define _ACPI_DI 0x04 /* '0'-'9' */ -#define _ACPI_LO 0x02 /* 'a'-'z' */ -#define _ACPI_PU 0x10 /* punctuation */ -#define _ACPI_SP 0x08 /* space, tab, CR, LF, VT, FF */ -#define _ACPI_UP 0x01 /* 'A'-'Z' */ -#define _ACPI_XD 0x80 /* '0'-'9', 'A'-'F', 'a'-'f' */ - -#define ACPI_IS_DIGIT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_DI)) -#define ACPI_IS_SPACE(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_SP)) -#define ACPI_IS_XDIGIT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_XD)) -#define ACPI_IS_UPPER(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_UP)) -#define ACPI_IS_LOWER(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO)) -#define ACPI_IS_PRINT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP | _ACPI_DI | _ACPI_XS | _ACPI_PU)) -#define ACPI_IS_ALPHA(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP)) - -#endif /* !ACPI_USE_SYSTEM_CLIBRARY */ - #define ACPI_IS_ASCII(c) ((c) < 0x80) /* @@ -430,6 +370,10 @@ acpi_status acpi_ut_execute_CID(struct acpi_namespace_node *device_node, struct acpi_pnp_device_id_list ** return_cid_list); +acpi_status +acpi_ut_execute_CLS(struct acpi_namespace_node *device_node, + struct acpi_pnp_device_id **return_id); + /* * utlock - reader/writer locks */ diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c index 43b40de90484..20de148594fd 100644 --- a/drivers/acpi/acpica/dsfield.c +++ b/drivers/acpi/acpica/dsfield.c @@ -502,7 +502,7 @@ acpi_ds_create_field(union acpi_parse_object *op, } } - ACPI_MEMSET(&info, 0, sizeof(struct acpi_create_field_info)); + memset(&info, 0, sizeof(struct acpi_create_field_info)); /* Second arg is the field flags */ diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c index bbe74bcebbae..95779e8ec3bb 100644 --- a/drivers/acpi/acpica/dsinit.c +++ b/drivers/acpi/acpica/dsinit.c @@ -207,7 +207,7 @@ acpi_ds_initialize_objects(u32 table_index, /* Set all init info to zero */ - ACPI_MEMSET(&info, 0, sizeof(struct acpi_init_walk_info)); + memset(&info, 0, sizeof(struct acpi_init_walk_info)); info.owner_id = owner_id; info.table_index = table_index; diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c index 8a7b07b6adc8..2beb7fd674ae 100644 --- a/drivers/acpi/acpica/dsobject.c +++ b/drivers/acpi/acpica/dsobject.c @@ -339,8 +339,8 @@ acpi_ds_build_internal_buffer_obj(struct acpi_walk_state *walk_state, /* Initialize buffer from the byte_list (if present) */ if (byte_list) { - ACPI_MEMCPY(obj_desc->buffer.pointer, - byte_list->named.data, byte_list_length); + memcpy(obj_desc->buffer.pointer, byte_list->named.data, + byte_list_length); } } @@ -750,8 +750,7 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state, case ACPI_TYPE_STRING: obj_desc->string.pointer = op->common.value.string; - obj_desc->string.length = - (u32) ACPI_STRLEN(op->common.value.string); + obj_desc->string.length = (u32)strlen(op->common.value.string); /* * The string is contained in the ACPI table, don't ever try diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c index deeddd6d2f05..ebc577baeaf9 100644 --- a/drivers/acpi/acpica/dsutils.c +++ b/drivers/acpi/acpica/dsutils.c @@ -572,8 +572,8 @@ acpi_ds_create_operand(struct acpi_walk_state *walk_state, obj_desc = acpi_ut_create_string_object((acpi_size) name_length); - ACPI_STRNCPY(obj_desc->string.pointer, - name_string, name_length); + strncpy(obj_desc->string.pointer, + name_string, name_length); status = AE_OK; } else { /* diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index 843942fb4be5..845ff44919c3 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -315,10 +315,19 @@ acpi_ds_load1_begin_op(struct acpi_walk_state * walk_state, flags = ACPI_NS_NO_UPSEARCH; if ((walk_state->opcode != AML_SCOPE_OP) && (!(walk_state->parse_flags & ACPI_PARSE_DEFERRED_OP))) { - flags |= ACPI_NS_ERROR_IF_FOUND; - ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, - "[%s] Cannot already exist\n", - acpi_ut_get_type_name(object_type))); + if (walk_state->namespace_override) { + flags |= ACPI_NS_OVERRIDE_IF_FOUND; + ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, + "[%s] Override allowed\n", + acpi_ut_get_type_name + (object_type))); + } else { + flags |= ACPI_NS_ERROR_IF_FOUND; + ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, + "[%s] Cannot already exist\n", + acpi_ut_get_type_name + (object_type))); + } } else { ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "[%s] Both Find or Create allowed\n", diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c index 8840296d5b20..ea4c0d3fca2d 100644 --- a/drivers/acpi/acpica/evgpeinit.c +++ b/drivers/acpi/acpica/evgpeinit.c @@ -377,7 +377,7 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle, /* 4) The last two characters of the name are the hex GPE Number */ - gpe_number = ACPI_STRTOUL(&name[2], NULL, 16); + gpe_number = strtoul(&name[2], NULL, 16); if (gpe_number == ACPI_UINT32_MAX) { /* Conversion failed; invalid method, just ignore it */ diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c index 6e0df2b9d5a4..24a4c5c2b124 100644 --- a/drivers/acpi/acpica/exconfig.c +++ b/drivers/acpi/acpica/exconfig.c @@ -470,7 +470,7 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc, return_ACPI_STATUS(AE_NO_MEMORY); } - ACPI_MEMCPY(table, table_header, length); + memcpy(table, table_header, length); break; default: diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c index 89a976b4ccf2..075d654c837f 100644 --- a/drivers/acpi/acpica/exconvrt.c +++ b/drivers/acpi/acpica/exconvrt.c @@ -227,9 +227,8 @@ acpi_ex_convert_to_buffer(union acpi_operand_object *obj_desc, /* Copy the integer to the buffer, LSB first */ new_buf = return_desc->buffer.pointer; - ACPI_MEMCPY(new_buf, - &obj_desc->integer.value, - acpi_gbl_integer_byte_width); + memcpy(new_buf, + &obj_desc->integer.value, acpi_gbl_integer_byte_width); break; case ACPI_TYPE_STRING: @@ -252,8 +251,8 @@ acpi_ex_convert_to_buffer(union acpi_operand_object *obj_desc, /* Copy the string to the buffer */ new_buf = return_desc->buffer.pointer; - ACPI_STRNCPY((char *)new_buf, (char *)obj_desc->string.pointer, - obj_desc->string.length); + strncpy((char *)new_buf, (char *)obj_desc->string.pointer, + obj_desc->string.length); break; default: diff --git a/drivers/acpi/acpica/exdebug.c b/drivers/acpi/acpica/exdebug.c index e67d0aca3fe6..815442bbd051 100644 --- a/drivers/acpi/acpica/exdebug.c +++ b/drivers/acpi/acpica/exdebug.c @@ -76,6 +76,8 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc, { u32 i; u32 timer; + union acpi_operand_object *object_desc; + u32 value; ACPI_FUNCTION_TRACE_PTR(ex_do_debug_object, source_desc); @@ -254,8 +256,44 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc, object)->object, level + 4, 0); } else { - acpi_ex_do_debug_object(source_desc->reference. - object, level + 4, 0); + object_desc = source_desc->reference.object; + value = source_desc->reference.value; + + switch (object_desc->common.type) { + case ACPI_TYPE_BUFFER: + + acpi_os_printf("Buffer[%u] = 0x%2.2X\n", + value, + *source_desc->reference. + index_pointer); + break; + + case ACPI_TYPE_STRING: + + acpi_os_printf + ("String[%u] = \"%c\" (0x%2.2X)\n", + value, + *source_desc->reference. + index_pointer, + *source_desc->reference. + index_pointer); + break; + + case ACPI_TYPE_PACKAGE: + + acpi_os_printf("Package[%u] = ", value); + acpi_ex_do_debug_object(*source_desc-> + reference.where, + level + 4, 0); + break; + + default: + + acpi_os_printf + ("Unknown Reference object type %X\n", + object_desc->common.type); + break; + } } } break; diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c index 1da52bef632e..401e7edcd419 100644 --- a/drivers/acpi/acpica/exdump.c +++ b/drivers/acpi/acpica/exdump.c @@ -224,7 +224,7 @@ static struct acpi_exdump_info acpi_ex_dump_index_field[5] = { {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(index_field.data_obj), "Data Object"} }; -static struct acpi_exdump_info acpi_ex_dump_reference[8] = { +static struct acpi_exdump_info acpi_ex_dump_reference[9] = { {ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE(acpi_ex_dump_reference), NULL}, {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(reference.class), "Class"}, {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(reference.target_type), "Target Type"}, @@ -232,6 +232,8 @@ static struct acpi_exdump_info acpi_ex_dump_reference[8] = { {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.object), "Object Desc"}, {ACPI_EXD_NODE, ACPI_EXD_OFFSET(reference.node), "Node"}, {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.where), "Where"}, + {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.index_pointer), + "Index Pointer"}, {ACPI_EXD_REFERENCE, 0, NULL} }; @@ -1005,14 +1007,13 @@ static void acpi_ex_dump_reference_obj(union acpi_operand_object *obj_desc) } else if (obj_desc->reference.object) { if (ACPI_GET_DESCRIPTOR_TYPE(obj_desc) == ACPI_DESC_TYPE_OPERAND) { - acpi_os_printf(" Target: %p", + acpi_os_printf("%22s %p", "Target :", obj_desc->reference.object); if (obj_desc->reference.class == ACPI_REFCLASS_TABLE) { acpi_os_printf(" Table Index: %X\n", obj_desc->reference.value); } else { - acpi_os_printf(" Target: %p [%s]\n", - obj_desc->reference.object, + acpi_os_printf(" [%s]\n", acpi_ut_get_type_name(((union acpi_operand_object *) diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c index c161dd974f74..61fd9c7b88bc 100644 --- a/drivers/acpi/acpica/exfield.c +++ b/drivers/acpi/acpica/exfield.c @@ -428,7 +428,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc, } buffer = buffer_desc->buffer.pointer; - ACPI_MEMCPY(buffer, source_desc->buffer.pointer, length); + memcpy(buffer, source_desc->buffer.pointer, length); /* Lock entire transaction if requested */ diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c index 725a3746a2df..70b7bbbb860b 100644 --- a/drivers/acpi/acpica/exfldio.c +++ b/drivers/acpi/acpica/exfldio.c @@ -416,22 +416,22 @@ acpi_ex_field_datum_io(union acpi_operand_object *obj_desc, * Copy the data from the source buffer. * Length is the field width in bytes. */ - ACPI_MEMCPY(value, - (obj_desc->buffer_field.buffer_obj)->buffer. - pointer + - obj_desc->buffer_field.base_byte_offset + - field_datum_byte_offset, - obj_desc->common_field.access_byte_width); + memcpy(value, + (obj_desc->buffer_field.buffer_obj)->buffer. + pointer + + obj_desc->buffer_field.base_byte_offset + + field_datum_byte_offset, + obj_desc->common_field.access_byte_width); } else { /* * Copy the data to the target buffer. * Length is the field width in bytes. */ - ACPI_MEMCPY((obj_desc->buffer_field.buffer_obj)->buffer. - pointer + - obj_desc->buffer_field.base_byte_offset + - field_datum_byte_offset, value, - obj_desc->common_field.access_byte_width); + memcpy((obj_desc->buffer_field.buffer_obj)->buffer. + pointer + + obj_desc->buffer_field.base_byte_offset + + field_datum_byte_offset, value, + obj_desc->common_field.access_byte_width); } status = AE_OK; @@ -703,7 +703,7 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, return_ACPI_STATUS(AE_BUFFER_OVERFLOW); } - ACPI_MEMSET(buffer, 0, buffer_length); + memset(buffer, 0, buffer_length); access_bit_width = ACPI_MUL_8(obj_desc->common_field.access_byte_width); /* Handle the simple case here */ @@ -720,7 +720,7 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, status = acpi_ex_field_datum_io(obj_desc, 0, &raw_datum, ACPI_READ); - ACPI_MEMCPY(buffer, &raw_datum, buffer_length); + memcpy(buffer, &raw_datum, buffer_length); } return_ACPI_STATUS(status); @@ -793,9 +793,9 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, /* Write merged datum to target buffer */ - ACPI_MEMCPY(((char *)buffer) + buffer_offset, &merged_datum, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(((char *)buffer) + buffer_offset, &merged_datum, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); buffer_offset += obj_desc->common_field.access_byte_width; merged_datum = @@ -811,9 +811,9 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, /* Write the last datum to the buffer */ - ACPI_MEMCPY(((char *)buffer) + buffer_offset, &merged_datum, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(((char *)buffer) + buffer_offset, &merged_datum, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); return_ACPI_STATUS(AE_OK); } @@ -878,7 +878,7 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc, * at Byte zero. All unused (upper) bytes of the * buffer will be 0. */ - ACPI_MEMCPY((char *)new_buffer, (char *)buffer, buffer_length); + memcpy((char *)new_buffer, (char *)buffer, buffer_length); buffer = new_buffer; buffer_length = required_length; } @@ -918,9 +918,9 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc, /* Get initial Datum from the input buffer */ - ACPI_MEMCPY(&raw_datum, buffer, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(&raw_datum, buffer, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); merged_datum = raw_datum << obj_desc->common_field.start_field_bit_offset; @@ -970,9 +970,9 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc, /* Get the next input datum from the buffer */ buffer_offset += obj_desc->common_field.access_byte_width; - ACPI_MEMCPY(&raw_datum, ((char *)buffer) + buffer_offset, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(&raw_datum, ((char *)buffer) + buffer_offset, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); merged_datum |= raw_datum << obj_desc->common_field.start_field_bit_offset; diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c index b56fc9d6f48e..d02afece0f10 100644 --- a/drivers/acpi/acpica/exmisc.c +++ b/drivers/acpi/acpica/exmisc.c @@ -209,8 +209,8 @@ acpi_ex_concat_template(union acpi_operand_object *operand0, * end_tag descriptor is copied from Operand1. */ new_buf = return_desc->buffer.pointer; - ACPI_MEMCPY(new_buf, operand0->buffer.pointer, length0); - ACPI_MEMCPY(new_buf + length0, operand1->buffer.pointer, length1); + memcpy(new_buf, operand0->buffer.pointer, length0); + memcpy(new_buf + length0, operand1->buffer.pointer, length1); /* Insert end_tag and set the checksum to zero, means "ignore checksum" */ @@ -318,14 +318,14 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Copy the first integer, LSB first */ - ACPI_MEMCPY(new_buf, &operand0->integer.value, - acpi_gbl_integer_byte_width); + memcpy(new_buf, &operand0->integer.value, + acpi_gbl_integer_byte_width); /* Copy the second integer (LSB first) after the first */ - ACPI_MEMCPY(new_buf + acpi_gbl_integer_byte_width, - &local_operand1->integer.value, - acpi_gbl_integer_byte_width); + memcpy(new_buf + acpi_gbl_integer_byte_width, + &local_operand1->integer.value, + acpi_gbl_integer_byte_width); break; case ACPI_TYPE_STRING: @@ -346,9 +346,9 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Concatenate the strings */ - ACPI_STRCPY(new_buf, operand0->string.pointer); - ACPI_STRCPY(new_buf + operand0->string.length, - local_operand1->string.pointer); + strcpy(new_buf, operand0->string.pointer); + strcpy(new_buf + operand0->string.length, + local_operand1->string.pointer); break; case ACPI_TYPE_BUFFER: @@ -369,11 +369,11 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Concatenate the buffers */ - ACPI_MEMCPY(new_buf, operand0->buffer.pointer, - operand0->buffer.length); - ACPI_MEMCPY(new_buf + operand0->buffer.length, - local_operand1->buffer.pointer, - local_operand1->buffer.length); + memcpy(new_buf, operand0->buffer.pointer, + operand0->buffer.length); + memcpy(new_buf + operand0->buffer.length, + local_operand1->buffer.pointer, + local_operand1->buffer.length); break; default: @@ -660,9 +660,9 @@ acpi_ex_do_logical_op(u16 opcode, /* Lexicographic compare: compare the data bytes */ - compare = ACPI_MEMCMP(operand0->buffer.pointer, - local_operand1->buffer.pointer, - (length0 > length1) ? length1 : length0); + compare = memcmp(operand0->buffer.pointer, + local_operand1->buffer.pointer, + (length0 > length1) ? length1 : length0); switch (opcode) { case AML_LEQUAL_OP: /* LEqual (Operand0, Operand1) */ diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c index 453b00c30177..20e87813c7d7 100644 --- a/drivers/acpi/acpica/exnames.c +++ b/drivers/acpi/acpica/exnames.c @@ -192,7 +192,7 @@ static acpi_status acpi_ex_name_segment(u8 ** in_aml_address, char *name_string) char_buf[4] = '\0'; if (name_string) { - ACPI_STRCAT(name_string, char_buf); + strcat(name_string, char_buf); ACPI_DEBUG_PRINT((ACPI_DB_NAMES, "Appended to - %s\n", name_string)); } else { diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c index fcc618aa2061..b8944ebb1081 100644 --- a/drivers/acpi/acpica/exoparg2.c +++ b/drivers/acpi/acpica/exoparg2.c @@ -337,8 +337,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state) * Copy the raw buffer data with no transform. * (NULL terminated already) */ - ACPI_MEMCPY(return_desc->string.pointer, - operand[0]->buffer.pointer, length); + memcpy(return_desc->string.pointer, + operand[0]->buffer.pointer, length); break; case AML_CONCAT_RES_OP: @@ -380,6 +380,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state) return_desc->reference.target_type = ACPI_TYPE_BUFFER_FIELD; + return_desc->reference.index_pointer = + &(operand[0]->buffer.pointer[index]); break; case ACPI_TYPE_BUFFER: @@ -391,6 +393,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state) return_desc->reference.target_type = ACPI_TYPE_BUFFER_FIELD; + return_desc->reference.index_pointer = + &(operand[0]->buffer.pointer[index]); break; case ACPI_TYPE_PACKAGE: diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c index 1c64a988cbee..fa100b3b92ee 100644 --- a/drivers/acpi/acpica/exoparg3.c +++ b/drivers/acpi/acpica/exoparg3.c @@ -237,8 +237,8 @@ acpi_status acpi_ex_opcode_3A_1T_1R(struct acpi_walk_state *walk_state) /* We have a buffer, copy the portion requested */ - ACPI_MEMCPY(buffer, operand[0]->string.pointer + index, - length); + memcpy(buffer, operand[0]->string.pointer + index, + length); } /* Set the length of the new String/Buffer */ diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c index f6c2f5499935..b4a5e44c00dd 100644 --- a/drivers/acpi/acpica/exregion.c +++ b/drivers/acpi/acpica/exregion.c @@ -517,15 +517,14 @@ acpi_ex_data_table_space_handler(u32 function, switch (function) { case ACPI_READ: - ACPI_MEMCPY(ACPI_CAST_PTR(char, value), - ACPI_PHYSADDR_TO_PTR(address), - ACPI_DIV_8(bit_width)); + memcpy(ACPI_CAST_PTR(char, value), + ACPI_PHYSADDR_TO_PTR(address), ACPI_DIV_8(bit_width)); break; case ACPI_WRITE: - ACPI_MEMCPY(ACPI_PHYSADDR_TO_PTR(address), - ACPI_CAST_PTR(char, value), ACPI_DIV_8(bit_width)); + memcpy(ACPI_PHYSADDR_TO_PTR(address), + ACPI_CAST_PTR(char, value), ACPI_DIV_8(bit_width)); break; default: diff --git a/drivers/acpi/acpica/exstorob.c b/drivers/acpi/acpica/exstorob.c index 6fa3c8d8fc5f..e1d4f4d51b97 100644 --- a/drivers/acpi/acpica/exstorob.c +++ b/drivers/acpi/acpica/exstorob.c @@ -100,9 +100,9 @@ acpi_ex_store_buffer_to_buffer(union acpi_operand_object *source_desc, /* Clear existing buffer and copy in the new one */ - ACPI_MEMSET(target_desc->buffer.pointer, 0, - target_desc->buffer.length); - ACPI_MEMCPY(target_desc->buffer.pointer, buffer, length); + memset(target_desc->buffer.pointer, 0, + target_desc->buffer.length); + memcpy(target_desc->buffer.pointer, buffer, length); #ifdef ACPI_OBSOLETE_BEHAVIOR /* @@ -129,8 +129,8 @@ acpi_ex_store_buffer_to_buffer(union acpi_operand_object *source_desc, } else { /* Truncate the source, copy only what will fit */ - ACPI_MEMCPY(target_desc->buffer.pointer, buffer, - target_desc->buffer.length); + memcpy(target_desc->buffer.pointer, buffer, + target_desc->buffer.length); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Truncating source buffer from %X to %X\n", @@ -187,9 +187,9 @@ acpi_ex_store_string_to_string(union acpi_operand_object *source_desc, * String will fit in existing non-static buffer. * Clear old string and copy in the new one */ - ACPI_MEMSET(target_desc->string.pointer, 0, - (acpi_size) target_desc->string.length + 1); - ACPI_MEMCPY(target_desc->string.pointer, buffer, length); + memset(target_desc->string.pointer, 0, + (acpi_size) target_desc->string.length + 1); + memcpy(target_desc->string.pointer, buffer, length); } else { /* * Free the current buffer, then allocate a new buffer @@ -210,7 +210,7 @@ acpi_ex_store_string_to_string(union acpi_operand_object *source_desc, } target_desc->common.flags &= ~AOPOBJ_STATIC_POINTER; - ACPI_MEMCPY(target_desc->string.pointer, buffer, length); + memcpy(target_desc->string.pointer, buffer, length); } /* Set the new target length */ diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c index 3f4225e95d93..30c3f464fda5 100644 --- a/drivers/acpi/acpica/exutils.c +++ b/drivers/acpi/acpica/exutils.c @@ -380,6 +380,38 @@ void acpi_ex_integer_to_string(char *out_string, u64 value) /******************************************************************************* * + * FUNCTION: acpi_ex_pci_cls_to_string + * + * PARAMETERS: out_string - Where to put the converted string (7 bytes) + * PARAMETERS: class_code - PCI class code to be converted (3 bytes) + * + * RETURN: None + * + * DESCRIPTION: Convert 3-bytes PCI class code to string representation. + * Return buffer must be large enough to hold the string. The + * string returned is always exactly of length + * ACPI_PCICLS_STRING_SIZE (includes null terminator). + * + ******************************************************************************/ + +void acpi_ex_pci_cls_to_string(char *out_string, u8 class_code[3]) +{ + + ACPI_FUNCTION_ENTRY(); + + /* All 3 bytes are hexadecimal */ + + out_string[0] = acpi_ut_hex_to_ascii_char((u64)class_code[0], 4); + out_string[1] = acpi_ut_hex_to_ascii_char((u64)class_code[0], 0); + out_string[2] = acpi_ut_hex_to_ascii_char((u64)class_code[1], 4); + out_string[3] = acpi_ut_hex_to_ascii_char((u64)class_code[1], 0); + out_string[4] = acpi_ut_hex_to_ascii_char((u64)class_code[2], 4); + out_string[5] = acpi_ut_hex_to_ascii_char((u64)class_code[2], 0); + out_string[6] = 0; +} + +/******************************************************************************* + * * FUNCTION: acpi_is_valid_space_id * * PARAMETERS: space_id - ID to be validated diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c index 3b3767698827..52dfd0d050fa 100644 --- a/drivers/acpi/acpica/hwxfsleep.c +++ b/drivers/acpi/acpica/hwxfsleep.c @@ -50,6 +50,13 @@ ACPI_MODULE_NAME("hwxfsleep") /* Local prototypes */ +#if (!ACPI_REDUCED_HARDWARE) +static acpi_status +acpi_hw_set_firmware_waking_vectors(struct acpi_table_facs *facs, + acpi_physical_address physical_address, + acpi_physical_address physical_address64); +#endif + static acpi_status acpi_hw_sleep_dispatch(u8 sleep_state, u32 function_id); /* @@ -72,6 +79,7 @@ static struct acpi_sleep_functions acpi_sleep_dispatch[] = { /* * These functions are removed for the ACPI_REDUCED_HARDWARE case: + * acpi_set_firmware_waking_vectors * acpi_set_firmware_waking_vector * acpi_set_firmware_waking_vector64 * acpi_enter_sleep_state_s4bios @@ -80,20 +88,26 @@ static struct acpi_sleep_functions acpi_sleep_dispatch[] = { #if (!ACPI_REDUCED_HARDWARE) /******************************************************************************* * - * FUNCTION: acpi_set_firmware_waking_vector + * FUNCTION: acpi_hw_set_firmware_waking_vectors * - * PARAMETERS: physical_address - 32-bit physical address of ACPI real mode + * PARAMETERS: facs - Pointer to FACS table + * physical_address - 32-bit physical address of ACPI real mode * entry point. + * physical_address64 - 64-bit physical address of ACPI protected + * mode entry point. * * RETURN: Status * - * DESCRIPTION: Sets the 32-bit firmware_waking_vector field of the FACS + * DESCRIPTION: Sets the firmware_waking_vector fields of the FACS * ******************************************************************************/ -acpi_status acpi_set_firmware_waking_vector(u32 physical_address) +static acpi_status +acpi_hw_set_firmware_waking_vectors(struct acpi_table_facs *facs, + acpi_physical_address physical_address, + acpi_physical_address physical_address64) { - ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector); + ACPI_FUNCTION_TRACE(acpi_hw_set_firmware_waking_vectors); /* @@ -106,17 +120,92 @@ acpi_status acpi_set_firmware_waking_vector(u32 physical_address) /* Set the 32-bit vector */ - acpi_gbl_FACS->firmware_waking_vector = physical_address; + facs->firmware_waking_vector = (u32)physical_address; - /* Clear the 64-bit vector if it exists */ + if (facs->length > 32) { + if (facs->version >= 1) { - if ((acpi_gbl_FACS->length > 32) && (acpi_gbl_FACS->version >= 1)) { - acpi_gbl_FACS->xfirmware_waking_vector = 0; + /* Set the 64-bit vector */ + + facs->xfirmware_waking_vector = physical_address64; + } else { + /* Clear the 64-bit vector if it exists */ + + facs->xfirmware_waking_vector = 0; + } } return_ACPI_STATUS(AE_OK); } +/******************************************************************************* + * + * FUNCTION: acpi_set_firmware_waking_vectors + * + * PARAMETERS: physical_address - 32-bit physical address of ACPI real mode + * entry point. + * physical_address64 - 64-bit physical address of ACPI protected + * mode entry point. + * + * RETURN: Status + * + * DESCRIPTION: Sets the firmware_waking_vector fields of the FACS + * + ******************************************************************************/ + +acpi_status +acpi_set_firmware_waking_vectors(acpi_physical_address physical_address, + acpi_physical_address physical_address64) +{ + + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vectors); + + /* If Hardware Reduced flag is set, there is no FACS */ + + if (acpi_gbl_reduced_hardware) { + return_ACPI_STATUS (AE_OK); + } + + if (acpi_gbl_facs32) { + (void)acpi_hw_set_firmware_waking_vectors(acpi_gbl_facs32, + physical_address, + physical_address64); + } + if (acpi_gbl_facs64) { + (void)acpi_hw_set_firmware_waking_vectors(acpi_gbl_facs64, + physical_address, + physical_address64); + } + + return_ACPI_STATUS(AE_OK); +} + +ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vectors) + +/******************************************************************************* + * + * FUNCTION: acpi_set_firmware_waking_vector + * + * PARAMETERS: physical_address - 32-bit physical address of ACPI real mode + * entry point. + * + * RETURN: Status + * + * DESCRIPTION: Sets the 32-bit firmware_waking_vector field of the FACS + * + ******************************************************************************/ +acpi_status acpi_set_firmware_waking_vector(u32 physical_address) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector); + + status = acpi_set_firmware_waking_vectors((acpi_physical_address) + physical_address, 0); + + return_ACPI_STATUS(status); +} + ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector) #if ACPI_MACHINE_WIDTH == 64 @@ -136,25 +225,19 @@ ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector) ******************************************************************************/ acpi_status acpi_set_firmware_waking_vector64(u64 physical_address) { - ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector64); - - - /* Determine if the 64-bit vector actually exists */ + acpi_status status; - if ((acpi_gbl_FACS->length <= 32) || (acpi_gbl_FACS->version < 1)) { - return_ACPI_STATUS(AE_NOT_EXIST); - } + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector64); - /* Clear 32-bit vector, set the 64-bit X_ vector */ + status = acpi_set_firmware_waking_vectors(0, + (acpi_physical_address) + physical_address); - acpi_gbl_FACS->firmware_waking_vector = 0; - acpi_gbl_FACS->xfirmware_waking_vector = physical_address; - return_ACPI_STATUS(AE_OK); + return_ACPI_STATUS(status); } ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector64) #endif - /******************************************************************************* * * FUNCTION: acpi_enter_sleep_state_s4bios diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c index 24fa19a76d70..c687b9979fb2 100644 --- a/drivers/acpi/acpica/nsaccess.c +++ b/drivers/acpi/acpica/nsaccess.c @@ -102,7 +102,7 @@ acpi_status acpi_ns_root_initialize(void) /* _OSI is optional for now, will be permanent later */ - if (!ACPI_STRCMP(init_val->name, "_OSI") + if (!strcmp(init_val->name, "_OSI") && !acpi_gbl_create_osi_method) { continue; } @@ -180,7 +180,7 @@ acpi_status acpi_ns_root_initialize(void) /* Build an object around the static string */ - obj_desc->string.length = (u32)ACPI_STRLEN(val); + obj_desc->string.length = (u32)strlen(val); obj_desc->string.pointer = val; obj_desc->common.flags |= AOPOBJ_STATIC_POINTER; break; @@ -203,7 +203,7 @@ acpi_status acpi_ns_root_initialize(void) /* Special case for ACPI Global Lock */ - if (ACPI_STRCMP(init_val->name, "_GL_") == 0) { + if (strcmp(init_val->name, "_GL_") == 0) { acpi_gbl_global_lock_mutex = obj_desc; /* Create additional counting semaphore for global lock */ @@ -304,7 +304,9 @@ acpi_ns_lookup(union acpi_generic_state *scope_info, return_ACPI_STATUS(AE_BAD_PARAMETER); } - local_flags = flags & ~(ACPI_NS_ERROR_IF_FOUND | ACPI_NS_SEARCH_PARENT); + local_flags = flags & + ~(ACPI_NS_ERROR_IF_FOUND | ACPI_NS_OVERRIDE_IF_FOUND | + ACPI_NS_SEARCH_PARENT); *return_node = ACPI_ENTRY_NOT_FOUND; acpi_gbl_ns_lookup_count++; @@ -547,6 +549,12 @@ acpi_ns_lookup(union acpi_generic_state *scope_info, if (flags & ACPI_NS_ERROR_IF_FOUND) { local_flags |= ACPI_NS_ERROR_IF_FOUND; } + + /* Set override flag according to caller */ + + if (flags & ACPI_NS_OVERRIDE_IF_FOUND) { + local_flags |= ACPI_NS_OVERRIDE_IF_FOUND; + } } /* Extract one ACPI name from the front of the pathname */ diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c index 1a8b39c8d969..da55a1c60da1 100644 --- a/drivers/acpi/acpica/nsconvert.c +++ b/drivers/acpi/acpica/nsconvert.c @@ -187,8 +187,8 @@ acpi_ns_convert_to_string(union acpi_operand_object *original_object, * Copy the raw buffer data with no transform. String is already NULL * terminated at Length+1. */ - ACPI_MEMCPY(new_object->string.pointer, - original_object->buffer.pointer, length); + memcpy(new_object->string.pointer, + original_object->buffer.pointer, length); break; default: @@ -251,9 +251,9 @@ acpi_ns_convert_to_buffer(union acpi_operand_object *original_object, return (AE_NO_MEMORY); } - ACPI_MEMCPY(new_object->buffer.pointer, - original_object->string.pointer, - original_object->string.length); + memcpy(new_object->buffer.pointer, + original_object->string.pointer, + original_object->string.length); break; case ACPI_TYPE_PACKAGE: diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c index d259393505fa..0f1daba640e7 100644 --- a/drivers/acpi/acpica/nsdump.c +++ b/drivers/acpi/acpica/nsdump.c @@ -101,7 +101,7 @@ void acpi_ns_print_pathname(u32 num_segments, char *pathname) while (num_segments) { for (i = 0; i < 4; i++) { - ACPI_IS_PRINT(pathname[i]) ? + isprint((int)pathname[i]) ? acpi_os_printf("%c", pathname[i]) : acpi_os_printf("?"); } diff --git a/drivers/acpi/acpica/nseval.c b/drivers/acpi/acpica/nseval.c index 7bcc68f57afa..80670cb32b5a 100644 --- a/drivers/acpi/acpica/nseval.c +++ b/drivers/acpi/acpica/nseval.c @@ -59,15 +59,14 @@ acpi_ns_exec_module_code(union acpi_operand_object *method_obj, * * FUNCTION: acpi_ns_evaluate * - * PARAMETERS: info - Evaluation info block, contains: + * PARAMETERS: info - Evaluation info block, contains these fields + * and more: * prefix_node - Prefix or Method/Object Node to execute * relative_path - Name of method to execute, If NULL, the * Node is the object to execute * parameters - List of parameters to pass to the method, * terminated by NULL. Params itself may be * NULL if no parameters are being passed. - * return_object - Where to put method's return value (if - * any). If NULL, no value is returned. * parameter_type - Type of Parameter list * return_object - Where to put method's return value (if * any). If NULL, no value is returned. @@ -440,7 +439,7 @@ acpi_ns_exec_module_code(union acpi_operand_object *method_obj, /* Initialize the evaluation information block */ - ACPI_MEMSET(info, 0, sizeof(struct acpi_evaluate_info)); + memset(info, 0, sizeof(struct acpi_evaluate_info)); info->prefix_node = parent_node; /* diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 4a85c4517988..b744a53618eb 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -90,7 +90,7 @@ acpi_status acpi_ns_initialize_objects(void) /* Set all init info to zero */ - ACPI_MEMSET(&info, 0, sizeof(struct acpi_init_walk_info)); + memset(&info, 0, sizeof(struct acpi_init_walk_info)); /* Walk entire namespace from the supplied root */ @@ -566,7 +566,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle, ACPI_DEBUG_EXEC(acpi_ut_display_init_pathname (ACPI_TYPE_METHOD, device_node, METHOD_NAME__INI)); - ACPI_MEMSET(info, 0, sizeof(struct acpi_evaluate_info)); + memset(info, 0, sizeof(struct acpi_evaluate_info)); info->prefix_node = device_node; info->relative_pathname = METHOD_NAME__INI; info->parameters = NULL; diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c index c95a119767b5..57a4cfe547e4 100644 --- a/drivers/acpi/acpica/nsparse.c +++ b/drivers/acpi/acpica/nsparse.c @@ -117,6 +117,13 @@ acpi_ns_one_complete_parse(u32 pass_number, (u8) pass_number); } + /* Found OSDT table, enable the namespace override feature */ + + if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT) && + pass_number == ACPI_IMODE_LOAD_PASS1) { + walk_state->namespace_override = TRUE; + } + if (ACPI_FAILURE(status)) { acpi_ds_delete_walk_state(walk_state); goto cleanup; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index c30672d23878..0515a70f42a4 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -580,7 +580,7 @@ acpi_ns_repair_HID(struct acpi_evaluate_info *info, * # is a hex digit. */ for (dest = new_string->string.pointer; *source; dest++, source++) { - *dest = (char)ACPI_TOUPPER(*source); + *dest = (char)toupper((int)*source); } acpi_ut_remove_reference(return_object); diff --git a/drivers/acpi/acpica/nssearch.c b/drivers/acpi/acpica/nssearch.c index 4a9d4a66016e..d73904013830 100644 --- a/drivers/acpi/acpica/nssearch.c +++ b/drivers/acpi/acpica/nssearch.c @@ -325,8 +325,41 @@ acpi_ns_search_and_enter(u32 target_name, * If we found it AND the request specifies that a find is an error, * return the error */ - if ((status == AE_OK) && (flags & ACPI_NS_ERROR_IF_FOUND)) { - status = AE_ALREADY_EXISTS; + if (status == AE_OK) { + + /* The node was found in the namespace */ + + /* + * If the namespace override feature is enabled for this node, + * delete any existing attached sub-object and make the node + * look like a new node that is owned by the override table. + */ + if (flags & ACPI_NS_OVERRIDE_IF_FOUND) { + ACPI_DEBUG_PRINT((ACPI_DB_NAMES, + "Namespace override: %4.4s pass %u type %X Owner %X\n", + ACPI_CAST_PTR(char, + &target_name), + interpreter_mode, + (*return_node)->type, + walk_state->owner_id)); + + acpi_ns_delete_children(*return_node); + if (acpi_gbl_runtime_namespace_override) { + acpi_ut_remove_reference((*return_node)->object); + (*return_node)->object = NULL; + (*return_node)->owner_id = + walk_state->owner_id; + } else { + acpi_ns_remove_node(*return_node); + *return_node = ACPI_ENTRY_NOT_FOUND; + } + } + + /* Return an error if we don't expect to find the object */ + + else if (flags & ACPI_NS_ERROR_IF_FOUND) { + status = AE_ALREADY_EXISTS; + } } #ifdef ACPI_ASL_COMPILER if (*return_node && (*return_node)->type == ACPI_TYPE_ANY) { diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c index 6ad02008c0c2..8d8104b8bd28 100644 --- a/drivers/acpi/acpica/nsutils.c +++ b/drivers/acpi/acpica/nsutils.c @@ -292,8 +292,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) } else { /* Convert the character to uppercase and save it */ - result[i] = - (char)ACPI_TOUPPER((int)*external_name); + result[i] = (char)toupper((int)*external_name); external_name++; } } diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c index b6030a2deee1..6ee1e52b903d 100644 --- a/drivers/acpi/acpica/nsxfeval.c +++ b/drivers/acpi/acpica/nsxfeval.c @@ -696,7 +696,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, return (AE_CTRL_DEPTH); } - no_match = ACPI_STRCMP(hid->string, info->hid); + no_match = strcmp(hid->string, info->hid); ACPI_FREE(hid); if (no_match) { @@ -715,8 +715,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, found = FALSE; for (i = 0; i < cid->count; i++) { - if (ACPI_STRCMP(cid->ids[i].string, info->hid) - == 0) { + if (strcmp(cid->ids[i].string, info->hid) == 0) { /* Found a matching CID */ diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c index d66c326485d8..9ff643b9553f 100644 --- a/drivers/acpi/acpica/nsxfname.c +++ b/drivers/acpi/acpica/nsxfname.c @@ -114,7 +114,7 @@ acpi_get_handle(acpi_handle parent, /* Special case for root-only, since we can't search for it */ - if (!ACPI_STRCMP(pathname, ACPI_NS_ROOT_PATH)) { + if (!strcmp(pathname, ACPI_NS_ROOT_PATH)) { *ret_handle = ACPI_CAST_PTR(acpi_handle, acpi_gbl_root_node); return (AE_OK); @@ -242,7 +242,7 @@ static char *acpi_ns_copy_device_id(struct acpi_pnp_device_id *dest, /* Copy actual string and return a pointer to the next string area */ - ACPI_MEMCPY(string_area, source->string, source->length); + memcpy(string_area, source->string, source->length); return (string_area + source->length); } @@ -260,7 +260,7 @@ static char *acpi_ns_copy_device_id(struct acpi_pnp_device_id *dest, * control methods (Such as in the case of a device.) * * For Device and Processor objects, run the Device _HID, _UID, _CID, _SUB, - * _STA, _ADR, _sx_w, and _sx_d methods. + * _CLS, _STA, _ADR, _sx_w, and _sx_d methods. * * Note: Allocates the return buffer, must be freed by the caller. * @@ -276,11 +276,12 @@ acpi_get_object_info(acpi_handle handle, struct acpi_pnp_device_id *hid = NULL; struct acpi_pnp_device_id *uid = NULL; struct acpi_pnp_device_id *sub = NULL; + struct acpi_pnp_device_id *cls = NULL; char *next_id_string; acpi_object_type type; acpi_name name; u8 param_count = 0; - u8 valid = 0; + u16 valid = 0; u32 info_size; u32 i; acpi_status status; @@ -320,7 +321,7 @@ acpi_get_object_info(acpi_handle handle, if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) { /* * Get extra info for ACPI Device/Processor objects only: - * Run the Device _HID, _UID, _SUB, and _CID methods. + * Run the Device _HID, _UID, _SUB, _CID, and _CLS methods. * * Note: none of these methods are required, so they may or may * not be present for this device. The Info->Valid bitfield is used @@ -363,6 +364,14 @@ acpi_get_object_info(acpi_handle handle, sizeof(struct acpi_pnp_device_id_list)); valid |= ACPI_VALID_CID; } + + /* Execute the Device._CLS method */ + + status = acpi_ut_execute_CLS(node, &cls); + if (ACPI_SUCCESS(status)) { + info_size += cls->length; + valid |= ACPI_VALID_CLS; + } } /* @@ -486,6 +495,11 @@ acpi_get_object_info(acpi_handle handle, } } + if (cls) { + next_id_string = acpi_ns_copy_device_id(&info->class_code, + cls, next_id_string); + } + /* Copy the fixed-length data */ info->info_size = info_size; @@ -510,6 +524,9 @@ cleanup: if (cid_list) { ACPI_FREE(cid_list); } + if (cls) { + ACPI_FREE(cls); + } return (status); } @@ -620,7 +637,7 @@ acpi_status acpi_install_method(u8 *buffer) /* Copy the method AML to the local buffer */ - ACPI_MEMCPY(aml_buffer, aml_start, aml_length); + memcpy(aml_buffer, aml_start, aml_length); /* Initialize the method object with the new method's information */ diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c index 960505ab409a..32440912023a 100644 --- a/drivers/acpi/acpica/psutils.c +++ b/drivers/acpi/acpica/psutils.c @@ -93,10 +93,9 @@ void acpi_ps_init_op(union acpi_parse_object *op, u16 opcode) op->common.descriptor_type = ACPI_DESC_TYPE_PARSER; op->common.aml_opcode = opcode; - ACPI_DISASM_ONLY_MEMBERS(ACPI_STRNCPY(op->common.aml_op_name, - (acpi_ps_get_opcode_info - (opcode))->name, - sizeof(op->common.aml_op_name))); + ACPI_DISASM_ONLY_MEMBERS(strncpy(op->common.aml_op_name, + (acpi_ps_get_opcode_info(opcode))-> + name, sizeof(op->common.aml_op_name))); } /******************************************************************************* diff --git a/drivers/acpi/acpica/rscreate.c b/drivers/acpi/acpica/rscreate.c index 15434e4c9b34..3fa829e96c2a 100644 --- a/drivers/acpi/acpica/rscreate.c +++ b/drivers/acpi/acpica/rscreate.c @@ -353,13 +353,13 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, /* +1 to include null terminator */ user_prt->length += - (u32) ACPI_STRLEN(user_prt->source) + 1; + (u32)strlen(user_prt->source) + 1; break; case ACPI_TYPE_STRING: - ACPI_STRCPY(user_prt->source, - obj_desc->string.pointer); + strcpy(user_prt->source, + obj_desc->string.pointer); /* * Add to the Length field the length of the string diff --git a/drivers/acpi/acpica/rsmisc.c b/drivers/acpi/acpica/rsmisc.c index 1fe49d223663..ac37852e0821 100644 --- a/drivers/acpi/acpica/rsmisc.c +++ b/drivers/acpi/acpica/rsmisc.c @@ -119,7 +119,7 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource, /* * Get the resource type and the initial (minimum) length */ - ACPI_MEMSET(resource, 0, INIT_RESOURCE_LENGTH(info)); + memset(resource, 0, INIT_RESOURCE_LENGTH(info)); resource->type = INIT_RESOURCE_TYPE(info); resource->length = INIT_RESOURCE_LENGTH(info); break; @@ -324,13 +324,13 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource, case ACPI_RSC_SET8: - ACPI_MEMSET(destination, info->aml_offset, info->value); + memset(destination, info->aml_offset, info->value); break; case ACPI_RSC_DATA8: target = ACPI_ADD_PTR(char, resource, info->value); - ACPI_MEMCPY(destination, source, ACPI_GET16(target)); + memcpy(destination, source, ACPI_GET16(target)); break; case ACPI_RSC_ADDRESS: @@ -502,7 +502,7 @@ acpi_rs_convert_resource_to_aml(struct acpi_resource *resource, switch (info->opcode) { case ACPI_RSC_INITSET: - ACPI_MEMSET(aml, 0, INIT_RESOURCE_LENGTH(info)); + memset(aml, 0, INIT_RESOURCE_LENGTH(info)); aml_length = INIT_RESOURCE_LENGTH(info); acpi_rs_set_resource_header(INIT_RESOURCE_TYPE(info), aml_length, aml); diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c index ece3cd60cc6a..52b024df0052 100644 --- a/drivers/acpi/acpica/rsutils.c +++ b/drivers/acpi/acpica/rsutils.c @@ -148,7 +148,7 @@ acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type) case ACPI_RSC_MOVE_SERIAL_VEN: case ACPI_RSC_MOVE_SERIAL_RES: - ACPI_MEMCPY(destination, source, item_count); + memcpy(destination, source, item_count); return; /* @@ -364,12 +364,11 @@ acpi_rs_get_resource_source(acpi_rs_length resource_length, * Zero the entire area of the buffer. */ total_length = - (u32) - ACPI_STRLEN(ACPI_CAST_PTR(char, &aml_resource_source[1])) + + (u32)strlen(ACPI_CAST_PTR(char, &aml_resource_source[1])) + 1; - total_length = (u32) ACPI_ROUND_UP_TO_NATIVE_WORD(total_length); + total_length = (u32)ACPI_ROUND_UP_TO_NATIVE_WORD(total_length); - ACPI_MEMSET(resource_source->string_ptr, 0, total_length); + memset(resource_source->string_ptr, 0, total_length); /* Copy the resource_source string to the destination */ @@ -432,8 +431,8 @@ acpi_rs_set_resource_source(union aml_resource * aml, /* Copy the resource_source string */ - ACPI_STRCPY(ACPI_CAST_PTR(char, &aml_resource_source[1]), - resource_source->string_ptr); + strcpy(ACPI_CAST_PTR(char, &aml_resource_source[1]), + resource_source->string_ptr); /* * Add the length of the string (+ 1 for null terminator) to the diff --git a/drivers/acpi/acpica/rsxface.c b/drivers/acpi/acpica/rsxface.c index 8e6276df0226..de51f836ef68 100644 --- a/drivers/acpi/acpica/rsxface.c +++ b/drivers/acpi/acpica/rsxface.c @@ -398,8 +398,8 @@ acpi_resource_to_address64(struct acpi_resource *resource, /* Simple copy for 64 bit source */ - ACPI_MEMCPY(out, &resource->data, - sizeof(struct acpi_resource_address64)); + memcpy(out, &resource->data, + sizeof(struct acpi_resource_address64)); break; default: @@ -499,7 +499,7 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context) */ if ((vendor->byte_length < (ACPI_UUID_LENGTH + 1)) || (vendor->uuid_subtype != info->uuid->subtype) || - (ACPI_MEMCMP(vendor->uuid, info->uuid->data, ACPI_UUID_LENGTH))) { + (memcmp(vendor->uuid, info->uuid->data, ACPI_UUID_LENGTH))) { return (AE_OK); } @@ -513,7 +513,7 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context) /* Found the correct resource, copy and return it */ - ACPI_MEMCPY(buffer->pointer, resource, resource->length); + memcpy(buffer->pointer, resource, resource->length); buffer->length = resource->length; /* Found the desired descriptor, terminate resource walk */ diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c index d7f8386455bd..5c9d5abf1588 100644 --- a/drivers/acpi/acpica/tbdata.c +++ b/drivers/acpi/acpica/tbdata.c @@ -73,7 +73,7 @@ acpi_tb_init_table_descriptor(struct acpi_table_desc *table_desc, * Initialize the table descriptor. Set the pointer to NULL, since the * table is not fully mapped at this time. */ - ACPI_MEMSET(table_desc, 0, sizeof(struct acpi_table_desc)); + memset(table_desc, 0, sizeof(struct acpi_table_desc)); table_desc->address = address; table_desc->length = table->length; table_desc->flags = flags; @@ -465,9 +465,9 @@ acpi_status acpi_tb_resize_root_table_list(void) /* Copy and free the previous table array */ if (acpi_gbl_root_table_list.tables) { - ACPI_MEMCPY(tables, acpi_gbl_root_table_list.tables, - (acpi_size) table_count * - sizeof(struct acpi_table_desc)); + memcpy(tables, acpi_gbl_root_table_list.tables, + (acpi_size) table_count * + sizeof(struct acpi_table_desc)); if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) { ACPI_FREE(acpi_gbl_root_table_list.tables); diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index 7d2486005e3f..6253001b6375 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -350,9 +350,18 @@ void acpi_tb_parse_fadt(u32 table_index) /* If Hardware Reduced flag is set, there is no FACS */ if (!acpi_gbl_reduced_hardware) { - acpi_tb_install_fixed_table((acpi_physical_address) - acpi_gbl_FADT.Xfacs, ACPI_SIG_FACS, - ACPI_TABLE_INDEX_FACS); + if (acpi_gbl_FADT.facs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.facs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_FACS); + } + if (acpi_gbl_FADT.Xfacs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.Xfacs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_X_FACS); + } } } @@ -389,12 +398,12 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length) /* Clear the entire local FADT */ - ACPI_MEMSET(&acpi_gbl_FADT, 0, sizeof(struct acpi_table_fadt)); + memset(&acpi_gbl_FADT, 0, sizeof(struct acpi_table_fadt)); /* Copy the original FADT, up to sizeof (struct acpi_table_fadt) */ - ACPI_MEMCPY(&acpi_gbl_FADT, table, - ACPI_MIN(length, sizeof(struct acpi_table_fadt))); + memcpy(&acpi_gbl_FADT, table, + ACPI_MIN(length, sizeof(struct acpi_table_fadt))); /* Take a copy of the Hardware Reduced flag */ @@ -491,13 +500,9 @@ static void acpi_tb_convert_fadt(void) acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt); /* - * Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary. + * Expand the 32-bit DSDT addresses to 64-bit as necessary. * Later ACPICA code will always use the X 64-bit field. */ - acpi_gbl_FADT.Xfacs = acpi_tb_select_address("FACS", - acpi_gbl_FADT.facs, - acpi_gbl_FADT.Xfacs); - acpi_gbl_FADT.Xdsdt = acpi_tb_select_address("DSDT", acpi_gbl_FADT.dsdt, acpi_gbl_FADT.Xdsdt); diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c index 0b879fcfef67..119c84ad9833 100644 --- a/drivers/acpi/acpica/tbfind.c +++ b/drivers/acpi/acpica/tbfind.c @@ -76,16 +76,16 @@ acpi_tb_find_table(char *signature, /* Normalize the input strings */ - ACPI_MEMSET(&header, 0, sizeof(struct acpi_table_header)); + memset(&header, 0, sizeof(struct acpi_table_header)); ACPI_MOVE_NAME(header.signature, signature); - ACPI_STRNCPY(header.oem_id, oem_id, ACPI_OEM_ID_SIZE); - ACPI_STRNCPY(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE); + strncpy(header.oem_id, oem_id, ACPI_OEM_ID_SIZE); + strncpy(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE); /* Search for the table */ for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) { - if (ACPI_MEMCMP(&(acpi_gbl_root_table_list.tables[i].signature), - header.signature, ACPI_NAME_SIZE)) { + if (memcmp(&(acpi_gbl_root_table_list.tables[i].signature), + header.signature, ACPI_NAME_SIZE)) { /* Not the requested table */ @@ -112,21 +112,20 @@ acpi_tb_find_table(char *signature, /* Check for table match on all IDs */ - if (!ACPI_MEMCMP + if (!memcmp (acpi_gbl_root_table_list.tables[i].pointer->signature, header.signature, ACPI_NAME_SIZE) && (!oem_id[0] || - !ACPI_MEMCMP + !memcmp (acpi_gbl_root_table_list. tables[i].pointer-> oem_id, header.oem_id, ACPI_OEM_ID_SIZE)) && (!oem_table_id[0] - || !ACPI_MEMCMP(acpi_gbl_root_table_list.tables[i]. - pointer->oem_table_id, - header.oem_table_id, - ACPI_OEM_TABLE_ID_SIZE))) { + || !memcmp(acpi_gbl_root_table_list.tables[i].pointer-> + oem_table_id, header.oem_table_id, + ACPI_OEM_TABLE_ID_SIZE))) { *table_index = i; ACPI_DEBUG_PRINT((ACPI_DB_TABLES, diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c index 008a251780f4..15ea98e0068d 100644 --- a/drivers/acpi/acpica/tbinstal.c +++ b/drivers/acpi/acpica/tbinstal.c @@ -87,8 +87,8 @@ acpi_tb_compare_tables(struct acpi_table_desc *table_desc, u32 table_index) * not just the header. */ is_identical = (u8)((table_desc->length != table_length || - ACPI_MEMCMP(table_desc->pointer, table, - table_length)) ? FALSE : TRUE); + memcmp(table_desc->pointer, table, table_length)) ? + FALSE : TRUE); /* Release the acquired table */ @@ -289,8 +289,7 @@ acpi_tb_install_standard_table(acpi_physical_address address, if ((new_table_desc.signature.ascii[0] != 0x00) && (!ACPI_COMPARE_NAME (&new_table_desc.signature, ACPI_SIG_SSDT)) - && (ACPI_STRNCMP(new_table_desc.signature.ascii, "OEM", 3))) - { + && (strncmp(new_table_desc.signature.ascii, "OEM", 3))) { ACPI_BIOS_ERROR((AE_INFO, "Table has invalid signature [%4.4s] (0x%8.8X), " "must be SSDT or OEMx", diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c index 77ba5c71c6e7..709d5112fc16 100644 --- a/drivers/acpi/acpica/tbprint.c +++ b/drivers/acpi/acpica/tbprint.c @@ -73,7 +73,7 @@ static void acpi_tb_fix_string(char *string, acpi_size length) { while (length && *string) { - if (!ACPI_IS_PRINT(*string)) { + if (!isprint((int)*string)) { *string = '?'; } string++; @@ -100,7 +100,7 @@ acpi_tb_cleanup_table_header(struct acpi_table_header *out_header, struct acpi_table_header *header) { - ACPI_MEMCPY(out_header, header, sizeof(struct acpi_table_header)); + memcpy(out_header, header, sizeof(struct acpi_table_header)); acpi_tb_fix_string(out_header->signature, ACPI_NAME_SIZE); acpi_tb_fix_string(out_header->oem_id, ACPI_OEM_ID_SIZE); @@ -138,9 +138,9 @@ acpi_tb_print_table_header(acpi_physical_address address, /* RSDP has no common fields */ - ACPI_MEMCPY(local_header.oem_id, - ACPI_CAST_PTR(struct acpi_table_rsdp, - header)->oem_id, ACPI_OEM_ID_SIZE); + memcpy(local_header.oem_id, + ACPI_CAST_PTR(struct acpi_table_rsdp, header)->oem_id, + ACPI_OEM_ID_SIZE); acpi_tb_fix_string(local_header.oem_id, ACPI_OEM_ID_SIZE); ACPI_INFO((AE_INFO, "RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)", diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 6559a58439c5..568ac0e4a3c6 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -68,7 +68,6 @@ acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size); acpi_status acpi_tb_initialize_facs(void) { - acpi_status status; /* If Hardware Reduced flag is set, there is no FACS */ @@ -77,11 +76,25 @@ acpi_status acpi_tb_initialize_facs(void) return (AE_OK); } - status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, - ACPI_CAST_INDIRECT_PTR(struct - acpi_table_header, - &acpi_gbl_FACS)); - return (status); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &acpi_gbl_facs32)); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_X_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &acpi_gbl_facs64)); + + if (acpi_gbl_facs64 + && (!acpi_gbl_facs32 || !acpi_gbl_use32_bit_facs_addresses)) { + acpi_gbl_FACS = acpi_gbl_facs64; + } else if (acpi_gbl_facs32) { + acpi_gbl_FACS = acpi_gbl_facs32; + } + + /* If there is no FACS, just continue. There was already an error msg */ + + return (AE_OK); } #endif /* !ACPI_REDUCED_HARDWARE */ @@ -101,7 +114,7 @@ acpi_status acpi_tb_initialize_facs(void) u8 acpi_tb_tables_loaded(void) { - if (acpi_gbl_root_table_list.current_table_count >= 3) { + if (acpi_gbl_root_table_list.current_table_count >= 4) { return (TRUE); } @@ -175,7 +188,7 @@ struct acpi_table_header *acpi_tb_copy_dsdt(u32 table_index) return (NULL); } - ACPI_MEMCPY(new_table, table_desc->pointer, table_desc->length); + memcpy(new_table, table_desc->pointer, table_desc->length); acpi_tb_uninstall_table(table_desc); acpi_tb_init_table_descriptor(&acpi_gbl_root_table_list. @@ -357,11 +370,11 @@ acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address) table_entry = ACPI_ADD_PTR(u8, table, sizeof(struct acpi_table_header)); /* - * First two entries in the table array are reserved for the DSDT - * and FACS, which are not actually present in the RSDT/XSDT - they - * come from the FADT + * First three entries in the table array are reserved for the DSDT + * and 32bit/64bit FACS, which are not actually present in the + * RSDT/XSDT - they come from the FADT */ - acpi_gbl_root_table_list.current_table_count = 2; + acpi_gbl_root_table_list.current_table_count = 3; /* Initialize the root table array from the RSDT/XSDT */ diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c index 60e94f87f27a..5559e2c70b15 100644 --- a/drivers/acpi/acpica/tbxface.c +++ b/drivers/acpi/acpica/tbxface.c @@ -119,9 +119,9 @@ acpi_initialize_tables(struct acpi_table_desc * initial_table_array, } else { /* Root Table Array has been statically allocated by the host */ - ACPI_MEMSET(initial_table_array, 0, - (acpi_size) initial_table_count * - sizeof(struct acpi_table_desc)); + memset(initial_table_array, 0, + (acpi_size) initial_table_count * + sizeof(struct acpi_table_desc)); acpi_gbl_root_table_list.tables = initial_table_array; acpi_gbl_root_table_list.max_table_count = initial_table_count; @@ -242,8 +242,9 @@ acpi_get_table_header(char *signature, if (!header) { return (AE_NO_MEMORY); } - ACPI_MEMCPY(out_table_header, header, - sizeof(struct acpi_table_header)); + + memcpy(out_table_header, header, + sizeof(struct acpi_table_header)); acpi_os_unmap_memory(header, sizeof(struct acpi_table_header)); @@ -251,9 +252,9 @@ acpi_get_table_header(char *signature, return (AE_NOT_FOUND); } } else { - ACPI_MEMCPY(out_table_header, - acpi_gbl_root_table_list.tables[i].pointer, - sizeof(struct acpi_table_header)); + memcpy(out_table_header, + acpi_gbl_root_table_list.tables[i].pointer, + sizeof(struct acpi_table_header)); } return (AE_OK); } diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index aadb3002a2dd..9682d40ca6ff 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -150,8 +150,8 @@ static acpi_status acpi_tb_load_namespace(void) * Save the original DSDT header for detection of table corruption * and/or replacement of the DSDT from outside the OS. */ - ACPI_MEMCPY(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT, - sizeof(struct acpi_table_header)); + memcpy(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT, + sizeof(struct acpi_table_header)); (void)acpi_ut_release_mutex(ACPI_MTX_TABLES); @@ -166,13 +166,18 @@ static acpi_status acpi_tb_load_namespace(void) (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) { - if ((!ACPI_COMPARE_NAME + if (!acpi_gbl_root_table_list.tables[i].address || + (!ACPI_COMPARE_NAME (&(acpi_gbl_root_table_list.tables[i].signature), ACPI_SIG_SSDT) && !ACPI_COMPARE_NAME(& (acpi_gbl_root_table_list.tables[i]. - signature), ACPI_SIG_PSDT)) + signature), ACPI_SIG_PSDT) + && + !ACPI_COMPARE_NAME(& + (acpi_gbl_root_table_list.tables[i]. + signature), ACPI_SIG_OSDT)) || ACPI_FAILURE(acpi_tb_validate_table (&acpi_gbl_root_table_list.tables[i]))) { @@ -219,9 +224,9 @@ acpi_install_table(acpi_physical_address address, u8 physical) ACPI_FUNCTION_TRACE(acpi_install_table); if (physical) { - flags = ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL; - } else { flags = ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL; + } else { + flags = ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL; } status = acpi_tb_install_standard_table(address, flags, diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c index 61d8f6d186d1..7a4101f0685e 100644 --- a/drivers/acpi/acpica/utalloc.c +++ b/drivers/acpi/acpica/utalloc.c @@ -73,7 +73,7 @@ void *acpi_os_allocate_zeroed(acpi_size size) /* Clear the memory block */ - ACPI_MEMSET(allocation, 0, size); + memset(allocation, 0, size); } return (allocation); @@ -181,7 +181,7 @@ acpi_status acpi_ut_delete_caches(void) char buffer[7]; if (acpi_gbl_display_final_mem_stats) { - ACPI_STRCPY(buffer, "MEMORY"); + strcpy(buffer, "MEMORY"); (void)acpi_db_display_statistics(buffer); } #endif @@ -337,6 +337,6 @@ acpi_ut_initialize_buffer(struct acpi_buffer * buffer, /* Have a valid buffer, clear it */ - ACPI_MEMSET(buffer->pointer, 0, required_length); + memset(buffer->pointer, 0, required_length); return (AE_OK); } diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c index a8c39643e618..01c8709ca586 100644 --- a/drivers/acpi/acpica/utbuffer.c +++ b/drivers/acpi/acpica/utbuffer.c @@ -159,7 +159,7 @@ void acpi_ut_dump_buffer(u8 *buffer, u32 count, u32 display, u32 base_offset) } buf_char = buffer[(acpi_size) i + j]; - if (ACPI_IS_PRINT(buf_char)) { + if (isprint(buf_char)) { acpi_os_printf("%c", buf_char); } else { acpi_os_printf("."); @@ -319,7 +319,7 @@ acpi_ut_dump_buffer_to_file(ACPI_FILE file, } buf_char = buffer[(acpi_size) i + j]; - if (ACPI_IS_PRINT(buf_char)) { + if (isprint(buf_char)) { acpi_ut_file_printf(file, "%c", buf_char); } else { acpi_ut_file_printf(file, "."); diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c index eacc5eee362e..0d21fbd99363 100644 --- a/drivers/acpi/acpica/utcache.c +++ b/drivers/acpi/acpica/utcache.c @@ -84,7 +84,7 @@ acpi_os_create_cache(char *cache_name, /* Populate the cache object and return it */ - ACPI_MEMSET(cache, 0, sizeof(struct acpi_memory_list)); + memset(cache, 0, sizeof(struct acpi_memory_list)); cache->list_name = cache_name; cache->object_size = object_size; cache->max_depth = max_depth; @@ -212,7 +212,7 @@ acpi_os_release_object(struct acpi_memory_list * cache, void *object) /* Mark the object as cached */ - ACPI_MEMSET(object, 0xCA, cache->object_size); + memset(object, 0xCA, cache->object_size); ACPI_SET_DESCRIPTOR_TYPE(object, ACPI_DESC_TYPE_CACHED); /* Put the object at the head of the cache list */ @@ -281,7 +281,7 @@ void *acpi_os_acquire_object(struct acpi_memory_list *cache) /* Clear (zero) the previously used Object */ - ACPI_MEMSET(object, 0, cache->object_size); + memset(object, 0, cache->object_size); } else { /* The cache is empty, create a new object */ diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c index c37ec5035f4c..257221d452c8 100644 --- a/drivers/acpi/acpica/utcopy.c +++ b/drivers/acpi/acpica/utcopy.c @@ -129,7 +129,7 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object, /* Always clear the external object */ - ACPI_MEMSET(external_object, 0, sizeof(union acpi_object)); + memset(external_object, 0, sizeof(union acpi_object)); /* * In general, the external object will be the same type as @@ -149,9 +149,9 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object, string. length + 1); - ACPI_MEMCPY((void *)data_space, - (void *)internal_object->string.pointer, - (acpi_size) internal_object->string.length + 1); + memcpy((void *)data_space, + (void *)internal_object->string.pointer, + (acpi_size) internal_object->string.length + 1); break; case ACPI_TYPE_BUFFER: @@ -162,9 +162,9 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object, ACPI_ROUND_UP_TO_NATIVE_WORD(internal_object->string. length); - ACPI_MEMCPY((void *)data_space, - (void *)internal_object->buffer.pointer, - internal_object->buffer.length); + memcpy((void *)data_space, + (void *)internal_object->buffer.pointer, + internal_object->buffer.length); break; case ACPI_TYPE_INTEGER: @@ -502,9 +502,9 @@ acpi_ut_copy_esimple_to_isimple(union acpi_object *external_object, goto error_exit; } - ACPI_MEMCPY(internal_object->string.pointer, - external_object->string.pointer, - external_object->string.length); + memcpy(internal_object->string.pointer, + external_object->string.pointer, + external_object->string.length); internal_object->string.length = external_object->string.length; break; @@ -517,9 +517,9 @@ acpi_ut_copy_esimple_to_isimple(union acpi_object *external_object, goto error_exit; } - ACPI_MEMCPY(internal_object->buffer.pointer, - external_object->buffer.pointer, - external_object->buffer.length); + memcpy(internal_object->buffer.pointer, + external_object->buffer.pointer, + external_object->buffer.length); internal_object->buffer.length = external_object->buffer.length; @@ -694,8 +694,8 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc, copy_size = sizeof(struct acpi_namespace_node); } - ACPI_MEMCPY(ACPI_CAST_PTR(char, dest_desc), - ACPI_CAST_PTR(char, source_desc), copy_size); + memcpy(ACPI_CAST_PTR(char, dest_desc), + ACPI_CAST_PTR(char, source_desc), copy_size); /* Restore the saved fields */ @@ -725,9 +725,9 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc, /* Copy the actual buffer data */ - ACPI_MEMCPY(dest_desc->buffer.pointer, - source_desc->buffer.pointer, - source_desc->buffer.length); + memcpy(dest_desc->buffer.pointer, + source_desc->buffer.pointer, + source_desc->buffer.length); } break; @@ -747,9 +747,9 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc, /* Copy the actual string data */ - ACPI_MEMCPY(dest_desc->string.pointer, - source_desc->string.pointer, - (acpi_size) source_desc->string.length + 1); + memcpy(dest_desc->string.pointer, + source_desc->string.pointer, + (acpi_size) source_desc->string.length + 1); } break; diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c index 4f3f888d33bb..cd02693841db 100644 --- a/drivers/acpi/acpica/utdebug.c +++ b/drivers/acpi/acpica/utdebug.c @@ -111,8 +111,8 @@ void acpi_ut_track_stack_ptr(void) * RETURN: Updated pointer to the function name * * DESCRIPTION: Remove the "Acpi" prefix from the function name, if present. - * This allows compiler macros such as __func__ to be used with no - * change to the debug output. + * This allows compiler macros such as __func__ to be used + * with no change to the debug output. * ******************************************************************************/ diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c index 5e8df9177da4..a72685c1e819 100644 --- a/drivers/acpi/acpica/utglobal.c +++ b/drivers/acpi/acpica/utglobal.c @@ -102,12 +102,19 @@ const struct acpi_predefined_names acpi_gbl_pre_defined_names[] = { {"_SB_", ACPI_TYPE_DEVICE, NULL}, {"_SI_", ACPI_TYPE_LOCAL_SCOPE, NULL}, {"_TZ_", ACPI_TYPE_DEVICE, NULL}, - {"_REV", ACPI_TYPE_INTEGER, (char *)ACPI_CA_SUPPORT_LEVEL}, + /* + * March, 2015: + * The _REV object is in the process of being deprecated, because + * other ACPI implementations permanently return 2. Thus, it + * has little or no value. Return 2 for compatibility with + * other ACPI implementations. + */ + {"_REV", ACPI_TYPE_INTEGER, ACPI_CAST_PTR(char, 2)}, {"_OS_", ACPI_TYPE_STRING, ACPI_OS_NAME}, - {"_GL_", ACPI_TYPE_MUTEX, (char *)1}, + {"_GL_", ACPI_TYPE_MUTEX, ACPI_CAST_PTR(char, 1)}, #if !defined (ACPI_NO_METHOD_EXECUTION) || defined (ACPI_CONSTANT_EVAL_ONLY) - {"_OSI", ACPI_TYPE_METHOD, (char *)1}, + {"_OSI", ACPI_TYPE_METHOD, ACPI_CAST_PTR(char, 1)}, #endif /* Table terminator */ diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c index 27431cfc1c44..7956df1e263c 100644 --- a/drivers/acpi/acpica/utids.c +++ b/drivers/acpi/acpica/utids.c @@ -1,6 +1,6 @@ /****************************************************************************** * - * Module Name: utids - support for device Ids - HID, UID, CID + * Module Name: utids - support for device Ids - HID, UID, CID, SUB, CLS * *****************************************************************************/ @@ -111,7 +111,7 @@ acpi_ut_execute_HID(struct acpi_namespace_node *device_node, if (obj_desc->common.type == ACPI_TYPE_INTEGER) { acpi_ex_eisa_id_to_string(hid->string, obj_desc->integer.value); } else { - ACPI_STRCPY(hid->string, obj_desc->string.pointer); + strcpy(hid->string, obj_desc->string.pointer); } hid->length = length; @@ -180,7 +180,7 @@ acpi_ut_execute_SUB(struct acpi_namespace_node *device_node, /* Simply copy existing string */ - ACPI_STRCPY(sub->string, obj_desc->string.pointer); + strcpy(sub->string, obj_desc->string.pointer); sub->length = length; *return_id = sub; @@ -256,7 +256,7 @@ acpi_ut_execute_UID(struct acpi_namespace_node *device_node, if (obj_desc->common.type == ACPI_TYPE_INTEGER) { acpi_ex_integer_to_string(uid->string, obj_desc->integer.value); } else { - ACPI_STRCPY(uid->string, obj_desc->string.pointer); + strcpy(uid->string, obj_desc->string.pointer); } uid->length = length; @@ -393,8 +393,7 @@ acpi_ut_execute_CID(struct acpi_namespace_node *device_node, /* Copy the String CID from the returned object */ - ACPI_STRCPY(next_id_string, - cid_objects[i]->string.pointer); + strcpy(next_id_string, cid_objects[i]->string.pointer); length = cid_objects[i]->string.length + 1; } @@ -416,3 +415,92 @@ cleanup: acpi_ut_remove_reference(obj_desc); return_ACPI_STATUS(status); } + +/******************************************************************************* + * + * FUNCTION: acpi_ut_execute_CLS + * + * PARAMETERS: device_node - Node for the device + * return_id - Where the _CLS is returned + * + * RETURN: Status + * + * DESCRIPTION: Executes the _CLS control method that returns PCI-defined + * class code of the device. The _CLS value is always a package + * containing PCI class information as a list of integers. + * The returned string has format "BBSSPP", where: + * BB = Base-class code + * SS = Sub-class code + * PP = Programming Interface code + * + ******************************************************************************/ + +acpi_status +acpi_ut_execute_CLS(struct acpi_namespace_node *device_node, + struct acpi_pnp_device_id **return_id) +{ + union acpi_operand_object *obj_desc; + union acpi_operand_object **cls_objects; + u32 count; + struct acpi_pnp_device_id *cls; + u32 length; + acpi_status status; + u8 class_code[3] = { 0, 0, 0 }; + + ACPI_FUNCTION_TRACE(ut_execute_CLS); + + status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CLS, + ACPI_BTYPE_PACKAGE, &obj_desc); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Get the size of the String to be returned, includes null terminator */ + + length = ACPI_PCICLS_STRING_SIZE; + cls_objects = obj_desc->package.elements; + count = obj_desc->package.count; + + if (obj_desc->common.type == ACPI_TYPE_PACKAGE) { + if (count > 0 + && cls_objects[0]->common.type == ACPI_TYPE_INTEGER) { + class_code[0] = (u8)cls_objects[0]->integer.value; + } + if (count > 1 + && cls_objects[1]->common.type == ACPI_TYPE_INTEGER) { + class_code[1] = (u8)cls_objects[1]->integer.value; + } + if (count > 2 + && cls_objects[2]->common.type == ACPI_TYPE_INTEGER) { + class_code[2] = (u8)cls_objects[2]->integer.value; + } + } + + /* Allocate a buffer for the CLS */ + + cls = + ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) + + (acpi_size) length); + if (!cls) { + status = AE_NO_MEMORY; + goto cleanup; + } + + /* Area for the string starts after PNP_DEVICE_ID struct */ + + cls->string = + ACPI_ADD_PTR(char, cls, sizeof(struct acpi_pnp_device_id)); + + /* Simply copy existing string */ + + acpi_ex_pci_cls_to_string(cls->string, class_code); + cls->length = length; + *return_id = cls; + +cleanup: + + /* On exit, we must delete the return object */ + + acpi_ut_remove_reference(obj_desc); + return_ACPI_STATUS(status); +} diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c index cbb7034d28d8..71b66537f826 100644 --- a/drivers/acpi/acpica/utmisc.c +++ b/drivers/acpi/acpica/utmisc.c @@ -66,9 +66,9 @@ u8 acpi_ut_is_pci_root_bridge(char *id) * Check if this is a PCI root bridge. * ACPI 3.0+: check for a PCI Express root also. */ - if (!(ACPI_STRCMP(id, - PCI_ROOT_HID_STRING)) || - !(ACPI_STRCMP(id, PCI_EXPRESS_ROOT_HID_STRING))) { + if (!(strcmp(id, + PCI_ROOT_HID_STRING)) || + !(strcmp(id, PCI_EXPRESS_ROOT_HID_STRING))) { return (TRUE); } @@ -97,7 +97,8 @@ u8 acpi_ut_is_aml_table(struct acpi_table_header *table) if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_DSDT) || ACPI_COMPARE_NAME(table->signature, ACPI_SIG_PSDT) || - ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT)) { + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT) || + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT)) { return (TRUE); } diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c index 44035abdbf29..8f3d203aed79 100644 --- a/drivers/acpi/acpica/utosi.c +++ b/drivers/acpi/acpica/utosi.c @@ -232,8 +232,7 @@ acpi_status acpi_ut_install_interface(acpi_string interface_name) return (AE_NO_MEMORY); } - interface_info->name = - ACPI_ALLOCATE_ZEROED(ACPI_STRLEN(interface_name) + 1); + interface_info->name = ACPI_ALLOCATE_ZEROED(strlen(interface_name) + 1); if (!interface_info->name) { ACPI_FREE(interface_info); return (AE_NO_MEMORY); @@ -241,7 +240,7 @@ acpi_status acpi_ut_install_interface(acpi_string interface_name) /* Initialize new info and insert at the head of the global list */ - ACPI_STRCPY(interface_info->name, interface_name); + strcpy(interface_info->name, interface_name); interface_info->flags = ACPI_OSI_DYNAMIC; interface_info->next = acpi_gbl_supported_interfaces; @@ -269,7 +268,7 @@ acpi_status acpi_ut_remove_interface(acpi_string interface_name) previous_interface = next_interface = acpi_gbl_supported_interfaces; while (next_interface) { - if (!ACPI_STRCMP(interface_name, next_interface->name)) { + if (!strcmp(interface_name, next_interface->name)) { /* Found: name is in either the static list or was added at runtime */ @@ -373,7 +372,7 @@ struct acpi_interface_info *acpi_ut_get_interface(acpi_string interface_name) next_interface = acpi_gbl_supported_interfaces; while (next_interface) { - if (!ACPI_STRCMP(interface_name, next_interface->name)) { + if (!strcmp(interface_name, next_interface->name)) { return (next_interface); } diff --git a/drivers/acpi/acpica/utpredef.c b/drivers/acpi/acpica/utpredef.c index 29e449935a82..97898ed71b4b 100644 --- a/drivers/acpi/acpica/utpredef.c +++ b/drivers/acpi/acpica/utpredef.c @@ -148,7 +148,7 @@ void acpi_ut_get_expected_return_types(char *buffer, u32 expected_btypes) u32 j; if (!expected_btypes) { - ACPI_STRCPY(buffer, "NONE"); + strcpy(buffer, "NONE"); return; } @@ -161,7 +161,7 @@ void acpi_ut_get_expected_return_types(char *buffer, u32 expected_btypes) /* If one of the expected types, concatenate the name of this type */ if (expected_btypes & this_rtype) { - ACPI_STRCAT(buffer, &ut_rtype_names[i][j]); + strcat(buffer, &ut_rtype_names[i][j]); j = 0; /* Use name separator from now on */ } diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c index 2be6bd4bdc09..b26297c5de49 100644 --- a/drivers/acpi/acpica/utprint.c +++ b/drivers/acpi/acpica/utprint.c @@ -180,7 +180,7 @@ const char *acpi_ut_scan_number(const char *string, u64 *number_ptr) { u64 number = 0; - while (ACPI_IS_DIGIT(*string)) { + while (isdigit((int)*string)) { number *= 10; number += *(string++) - '0'; } @@ -405,7 +405,7 @@ acpi_ut_vsnprintf(char *string, /* Process width */ width = -1; - if (ACPI_IS_DIGIT(*format)) { + if (isdigit((int)*format)) { format = acpi_ut_scan_number(format, &number); width = (s32) number; } else if (*format == '*') { @@ -422,7 +422,7 @@ acpi_ut_vsnprintf(char *string, precision = -1; if (*format == '.') { ++format; - if (ACPI_IS_DIGIT(*format)) { + if (isdigit((int)*format)) { format = acpi_ut_scan_number(format, &number); precision = (s32) number; } else if (*format == '*') { diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c index 83b6c52490dc..8f3c883dfe0e 100644 --- a/drivers/acpi/acpica/utstring.c +++ b/drivers/acpi/acpica/utstring.c @@ -79,7 +79,7 @@ void acpi_ut_strlwr(char *src_string) /* Walk entire string, lowercasing the letters */ for (string = src_string; *string; string++) { - *string = (char)ACPI_TOLOWER(*string); + *string = (char)tolower((int)*string); } return; @@ -145,7 +145,7 @@ void acpi_ut_strupr(char *src_string) /* Walk entire string, uppercasing the letters */ for (string = src_string; *string; string++) { - *string = (char)ACPI_TOUPPER(*string); + *string = (char)toupper((int)*string); } return; @@ -202,7 +202,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) /* Skip over any white space in the buffer */ - while ((*string) && (ACPI_IS_SPACE(*string) || *string == '\t')) { + while ((*string) && (isspace((int)*string) || *string == '\t')) { string++; } @@ -211,7 +211,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) * Base equal to ACPI_ANY_BASE means 'ToInteger operation case'. * We need to determine if it is decimal or hexadecimal. */ - if ((*string == '0') && (ACPI_TOLOWER(*(string + 1)) == 'x')) { + if ((*string == '0') && (tolower((int)*(string + 1)) == 'x')) { sign_of0x = 1; base = 16; @@ -224,7 +224,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) /* Any string left? Check that '0x' is not followed by white space. */ - if (!(*string) || ACPI_IS_SPACE(*string) || *string == '\t') { + if (!(*string) || isspace((int)*string) || *string == '\t') { if (to_integer_op) { goto error_exit; } else { @@ -241,7 +241,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) /* Main loop: convert the string to a 32- or 64-bit integer */ while (*string) { - if (ACPI_IS_DIGIT(*string)) { + if (isdigit((int)*string)) { /* Convert ASCII 0-9 to Decimal value */ @@ -252,8 +252,8 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) term = 1; } else { - this_digit = (u8)ACPI_TOUPPER(*string); - if (ACPI_IS_XDIGIT((char)this_digit)) { + this_digit = (u8)toupper((int)*string); + if (isxdigit((int)this_digit)) { /* Convert ASCII Hex char to value */ @@ -404,7 +404,7 @@ void acpi_ut_print_string(char *string, u16 max_length) /* Check for printable character or hex escape */ - if (ACPI_IS_PRINT(string[i])) { + if (isprint((int)string[i])) { /* This is a normal character */ acpi_os_printf("%c", (int)string[i]); @@ -609,22 +609,22 @@ void ut_convert_backslashes(char *pathname) u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source) { - if (ACPI_STRLEN(source) >= dest_size) { + if (strlen(source) >= dest_size) { return (TRUE); } - ACPI_STRCPY(dest, source); + strcpy(dest, source); return (FALSE); } u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source) { - if ((ACPI_STRLEN(dest) + ACPI_STRLEN(source)) >= dest_size) { + if ((strlen(dest) + strlen(source)) >= dest_size) { return (TRUE); } - ACPI_STRCAT(dest, source); + strcat(dest, source); return (FALSE); } @@ -635,14 +635,13 @@ acpi_ut_safe_strncat(char *dest, { acpi_size actual_transfer_length; - actual_transfer_length = - ACPI_MIN(max_transfer_length, ACPI_STRLEN(source)); + actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source)); - if ((ACPI_STRLEN(dest) + actual_transfer_length) >= dest_size) { + if ((strlen(dest) + actual_transfer_length) >= dest_size) { return (TRUE); } - ACPI_STRNCAT(dest, source, max_transfer_length); + strncat(dest, source, max_transfer_length); return (FALSE); } #endif diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c index 130dd9f96f0f..9a7dc8196a5d 100644 --- a/drivers/acpi/acpica/uttrack.c +++ b/drivers/acpi/acpica/uttrack.c @@ -100,7 +100,7 @@ acpi_ut_create_list(char *list_name, return (AE_NO_MEMORY); } - ACPI_MEMSET(cache, 0, sizeof(struct acpi_memory_list)); + memset(cache, 0, sizeof(struct acpi_memory_list)); cache->list_name = list_name; cache->object_size = object_size; @@ -402,7 +402,7 @@ acpi_ut_track_allocation(struct acpi_debug_mem_block *allocation, allocation->component = component; allocation->line = line; - ACPI_STRNCPY(allocation->module, module, ACPI_MAX_MODULE_NAME); + strncpy(allocation->module, module, ACPI_MAX_MODULE_NAME); allocation->module[ACPI_MAX_MODULE_NAME - 1] = 0; if (!element) { @@ -497,7 +497,7 @@ acpi_ut_remove_allocation(struct acpi_debug_mem_block *allocation, /* Mark the segment as deleted */ - ACPI_MEMSET(&allocation->user_space, 0xEA, allocation->size); + memset(&allocation->user_space, 0xEA, allocation->size); status = acpi_ut_release_mutex(ACPI_MTX_MEMORY); return (status); @@ -595,7 +595,7 @@ void acpi_ut_dump_allocations(u32 component, const char *module) while (element) { if ((element->component & component) && ((module == NULL) - || (0 == ACPI_STRCMP(module, element->module)))) { + || (0 == strcmp(module, element->module)))) { descriptor = ACPI_CAST_PTR(union acpi_descriptor, &element->user_space); diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c index 0929187bdce0..51cf52d52243 100644 --- a/drivers/acpi/acpica/utxface.c +++ b/drivers/acpi/acpica/utxface.c @@ -234,8 +234,8 @@ acpi_status acpi_get_statistics(struct acpi_statistics *stats) stats->sci_count = acpi_sci_count; stats->gpe_count = acpi_gpe_count; - ACPI_MEMCPY(stats->fixed_event_count, acpi_fixed_event_count, - sizeof(acpi_fixed_event_count)); + memcpy(stats->fixed_event_count, acpi_fixed_event_count, + sizeof(acpi_fixed_event_count)); /* Other counters */ @@ -322,7 +322,7 @@ acpi_status acpi_install_interface(acpi_string interface_name) /* Parameter validation */ - if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) { + if (!interface_name || (strlen(interface_name) == 0)) { return (AE_BAD_PARAMETER); } @@ -374,7 +374,7 @@ acpi_status acpi_remove_interface(acpi_string interface_name) /* Parameter validation */ - if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) { + if (!interface_name || (strlen(interface_name) == 0)) { return (AE_BAD_PARAMETER); } diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c index 083a76891889..42a32a66ef22 100644 --- a/drivers/acpi/acpica/utxfinit.c +++ b/drivers/acpi/acpica/utxfinit.c @@ -179,10 +179,12 @@ acpi_status __init acpi_enable_subsystem(u32 flags) * Obtain a permanent mapping for the FACS. This is required for the * Global Lock and the Firmware Waking Vector */ - status = acpi_tb_initialize_facs(); - if (ACPI_FAILURE(status)) { - ACPI_WARNING((AE_INFO, "Could not map the FACS table")); - return_ACPI_STATUS(status); + if (!(flags & ACPI_NO_FACS_INIT)) { + status = acpi_tb_initialize_facs(); + if (ACPI_FAILURE(status)) { + ACPI_WARNING((AE_INFO, "Could not map the FACS table")); + return_ACPI_STATUS(status); + } } #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index 1d1791935c31..278dc4be992a 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -162,6 +162,15 @@ static int __init dmi_disable_osi_win8(const struct dmi_system_id *d) acpi_osi_setup("!Windows 2012"); return 0; } +#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE +static int __init dmi_enable_rev_override(const struct dmi_system_id *d) +{ + printk(KERN_NOTICE PREFIX "DMI detected: %s (force ACPI _REV to 5)\n", + d->ident); + acpi_rev_override_setup(NULL); + return 0; +} +#endif static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { { @@ -325,6 +334,23 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "1015PX"), }, }, + +#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE + /* + * DELL XPS 13 (2015) switches sound between HDA and I2S + * depending on the ACPI _REV callback. If userspace supports + * I2S sufficiently (or if you do not care about sound), you + * can safely disable this quirk. + */ + { + .callback = dmi_enable_rev_override, + .ident = "DELL XPS 13 (2015)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9343"), + }, + }, +#endif {} }; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 787c629bc9b4..4683a96932b9 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -58,6 +58,7 @@ void acpi_cmos_rtc_init(void); #else static inline void acpi_cmos_rtc_init(void) {} #endif +int acpi_rev_override_setup(char *str); extern bool acpi_force_hot_remove; diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index a5dc9034efee..c262e4acd68d 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -530,6 +530,19 @@ acpi_os_get_physical_address(void *virt, acpi_physical_address * phys) } #endif +#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE +static bool acpi_rev_override; + +int __init acpi_rev_override_setup(char *str) +{ + acpi_rev_override = true; + return 1; +} +__setup("acpi_rev_override", acpi_rev_override_setup); +#else +#define acpi_rev_override false +#endif + #define ACPI_MAX_OVERRIDE_LEN 100 static char acpi_os_name[ACPI_MAX_OVERRIDE_LEN]; @@ -548,6 +561,11 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val, *new_val = acpi_os_name; } + if (!memcmp(init_val->name, "_REV", 4) && acpi_rev_override) { + printk(KERN_INFO PREFIX "Overriding _REV return value to 5\n"); + *new_val = (char *)5; + } + return AE_OK; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ec6c5c6e1ac9..d94529d5c8e9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -346,6 +346,7 @@ struct rbd_device { struct rbd_image_header header; unsigned long flags; /* possibly lock protected */ struct rbd_spec *spec; + struct rbd_options *opts; char *header_name; @@ -724,34 +725,36 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) } /* - * mount options + * (Per device) rbd map options */ enum { + Opt_queue_depth, Opt_last_int, /* int args above */ Opt_last_string, /* string args above */ Opt_read_only, Opt_read_write, - /* Boolean args above */ - Opt_last_bool, + Opt_err }; static match_table_t rbd_opts_tokens = { + {Opt_queue_depth, "queue_depth=%d"}, /* int args above */ /* string args above */ {Opt_read_only, "read_only"}, {Opt_read_only, "ro"}, /* Alternate spelling */ {Opt_read_write, "read_write"}, {Opt_read_write, "rw"}, /* Alternate spelling */ - /* Boolean args above */ - {-1, NULL} + {Opt_err, NULL} }; struct rbd_options { + int queue_depth; bool read_only; }; +#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ #define RBD_READ_ONLY_DEFAULT false static int parse_rbd_opts_token(char *c, void *private) @@ -761,27 +764,27 @@ static int parse_rbd_opts_token(char *c, void *private) int token, intval, ret; token = match_token(c, rbd_opts_tokens, argstr); - if (token < 0) - return -EINVAL; - if (token < Opt_last_int) { ret = match_int(&argstr[0], &intval); if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); + pr_err("bad mount option arg (not int) at '%s'\n", c); return ret; } dout("got int token %d val %d\n", token, intval); } else if (token > Opt_last_int && token < Opt_last_string) { - dout("got string token %d val %s\n", token, - argstr[0].from); - } else if (token > Opt_last_string && token < Opt_last_bool) { - dout("got Boolean token %d\n", token); + dout("got string token %d val %s\n", token, argstr[0].from); } else { dout("got token %d\n", token); } switch (token) { + case Opt_queue_depth: + if (intval < 1) { + pr_err("queue_depth out of range\n"); + return -EINVAL; + } + rbd_opts->queue_depth = intval; + break; case Opt_read_only: rbd_opts->read_only = true; break; @@ -789,9 +792,10 @@ static int parse_rbd_opts_token(char *c, void *private) rbd_opts->read_only = false; break; default: - rbd_assert(false); - break; + /* libceph prints "bad option" msg */ + return -EINVAL; } + return 0; } @@ -1563,22 +1567,39 @@ static void rbd_obj_request_end(struct rbd_obj_request *obj_request) /* * Wait for an object request to complete. If interrupted, cancel the * underlying osd request. + * + * @timeout: in jiffies, 0 means "wait forever" */ -static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) +static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request, + unsigned long timeout) { - int ret; + long ret; dout("%s %p\n", __func__, obj_request); - - ret = wait_for_completion_interruptible(&obj_request->completion); - if (ret < 0) { - dout("%s %p interrupted\n", __func__, obj_request); + ret = wait_for_completion_interruptible_timeout( + &obj_request->completion, + ceph_timeout_jiffies(timeout)); + if (ret <= 0) { + if (ret == 0) + ret = -ETIMEDOUT; rbd_obj_request_end(obj_request); - return ret; + } else { + ret = 0; } - dout("%s %p done\n", __func__, obj_request); - return 0; + dout("%s %p ret %d\n", __func__, obj_request, (int)ret); + return ret; +} + +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) +{ + return __rbd_obj_request_wait(obj_request, 0); +} + +static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request, + unsigned long timeout) +{ + return __rbd_obj_request_wait(obj_request, timeout); } static void rbd_img_request_complete(struct rbd_img_request *img_request) @@ -2001,11 +2022,11 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, rbd_assert(obj_request_type_valid(type)); size = strlen(object_name) + 1; - name = kmalloc(size, GFP_KERNEL); + name = kmalloc(size, GFP_NOIO); if (!name) return NULL; - obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL); + obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO); if (!obj_request) { kfree(name); return NULL; @@ -2376,7 +2397,7 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, } if (opcode == CEPH_OSD_OP_DELETE) - osd_req_op_init(osd_request, num_ops, opcode); + osd_req_op_init(osd_request, num_ops, opcode, 0); else osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length, 0, 0); @@ -2848,7 +2869,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) goto out; stat_request->callback = rbd_img_obj_exists_callback; - osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT); + osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0); osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0, false, false); rbd_osd_req_format_read(stat_request); @@ -3122,6 +3143,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( bool watch) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct ceph_options *opts = osdc->client->options; struct rbd_obj_request *obj_request; int ret; @@ -3148,7 +3170,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( if (ret) goto out; - ret = rbd_obj_request_wait(obj_request); + ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout); if (ret) goto out; @@ -3750,10 +3772,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); rbd_dev->tag_set.ops = &rbd_mq_ops; - rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ; + rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; rbd_dev->tag_set.numa_node = NUMA_NO_NODE; - rbd_dev->tag_set.flags = - BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; rbd_dev->tag_set.nr_hw_queues = 1; rbd_dev->tag_set.cmd_size = sizeof(struct work_struct); @@ -3773,6 +3794,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) /* set io sizes to object size */ segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_segments(q, segment_size / SECTOR_SIZE); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); @@ -4044,7 +4066,8 @@ static void rbd_spec_free(struct kref *kref) } static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, - struct rbd_spec *spec) + struct rbd_spec *spec, + struct rbd_options *opts) { struct rbd_device *rbd_dev; @@ -4058,8 +4081,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, INIT_LIST_HEAD(&rbd_dev->node); init_rwsem(&rbd_dev->header_rwsem); - rbd_dev->spec = spec; rbd_dev->rbd_client = rbdc; + rbd_dev->spec = spec; + rbd_dev->opts = opts; /* Initialize the layout used for all rbd requests */ @@ -4075,6 +4099,7 @@ static void rbd_dev_destroy(struct rbd_device *rbd_dev) { rbd_put_client(rbd_dev->rbd_client); rbd_spec_put(rbd_dev->spec); + kfree(rbd_dev->opts); kfree(rbd_dev); } @@ -4933,6 +4958,7 @@ static int rbd_add_parse_args(const char *buf, goto out_mem; rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; + rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; copts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, @@ -4963,8 +4989,8 @@ out_err: */ static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) { + struct ceph_options *opts = rbdc->client->options; u64 newest_epoch; - unsigned long timeout = rbdc->client->options->mount_timeout * HZ; int tries = 0; int ret; @@ -4979,7 +5005,8 @@ again: if (rbdc->client->osdc.osdmap->epoch < newest_epoch) { ceph_monc_request_next_osdmap(&rbdc->client->monc); (void) ceph_monc_wait_osdmap(&rbdc->client->monc, - newest_epoch, timeout); + newest_epoch, + opts->mount_timeout); goto again; } else { /* the osdmap we have is new enough */ @@ -5148,7 +5175,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) rbdc = __rbd_get_client(rbd_dev->rbd_client); ret = -ENOMEM; - parent = rbd_dev_create(rbdc, parent_spec); + parent = rbd_dev_create(rbdc, parent_spec, NULL); if (!parent) goto out_err; @@ -5394,9 +5421,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); if (rc < 0) goto err_out_module; - read_only = rbd_opts->read_only; - kfree(rbd_opts); - rbd_opts = NULL; /* done with this */ rbdc = rbd_get_client(ceph_opts); if (IS_ERR(rbdc)) { @@ -5422,11 +5446,12 @@ static ssize_t do_rbd_add(struct bus_type *bus, goto err_out_client; } - rbd_dev = rbd_dev_create(rbdc, spec); + rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts); if (!rbd_dev) goto err_out_client; rbdc = NULL; /* rbd_dev now owns this */ spec = NULL; /* rbd_dev now owns this */ + rbd_opts = NULL; /* rbd_dev now owns this */ rc = rbd_dev_image_probe(rbd_dev, true); if (rc < 0) @@ -5434,6 +5459,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, /* If we are mapping a snapshot it must be marked read-only */ + read_only = rbd_dev->opts->read_only; if (rbd_dev->spec->snap_id != CEPH_NOSNAP) read_only = true; rbd_dev->mapping.read_only = read_only; @@ -5458,6 +5484,7 @@ err_out_client: rbd_put_client(rbdc); err_out_args: rbd_spec_put(spec); + kfree(rbd_opts); err_out_module: module_put(THIS_MODULE); diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 0b4188b9af7c..c6dea3f6917b 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -581,7 +581,7 @@ static inline int needs_ilk_vtd_wa(void) /* Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB || + if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG || gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) && intel_iommu_gfx_mapped) return 1; diff --git a/drivers/clk/clk-max77686.c b/drivers/clk/clk-max77686.c index 86cdb3a28629..446c2fe76dc2 100644 --- a/drivers/clk/clk-max77686.c +++ b/drivers/clk/clk-max77686.c @@ -23,6 +23,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/err.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/mfd/max77686.h> #include <linux/mfd/max77686-private.h> diff --git a/drivers/clk/clk-max77802.c b/drivers/clk/clk-max77802.c index 0729dc723a8f..74c49b93a6eb 100644 --- a/drivers/clk/clk-max77802.c +++ b/drivers/clk/clk-max77802.c @@ -22,6 +22,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/err.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/mfd/max77686-private.h> #include <linux/clk-provider.h> diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c index 05e04ce0f148..c9487179f25f 100644 --- a/drivers/clk/clk-nomadik.c +++ b/drivers/clk/clk-nomadik.c @@ -503,8 +503,7 @@ static int __init nomadik_src_clk_init_debugfs(void) NULL, NULL, &nomadik_src_clk_debugfs_ops); return 0; } - -module_init(nomadik_src_clk_init_debugfs); +device_initcall(nomadik_src_clk_init_debugfs); #endif diff --git a/drivers/clk/sunxi/clk-mod0.c b/drivers/clk/sunxi/clk-mod0.c index ec8f5a1fca09..9d028aec58e5 100644 --- a/drivers/clk/sunxi/clk-mod0.c +++ b/drivers/clk/sunxi/clk-mod0.c @@ -128,7 +128,7 @@ static struct platform_driver sun4i_a10_mod0_clk_driver = { }, .probe = sun4i_a10_mod0_clk_probe, }; -module_platform_driver(sun4i_a10_mod0_clk_driver); +builtin_platform_driver(sun4i_a10_mod0_clk_driver); static const struct factors_data sun9i_a80_mod0_data __initconst = { .enable = 31, diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c index fb24aaf4adcf..ae5b2bd3a978 100644 --- a/drivers/cpufreq/exynos-cpufreq.c +++ b/drivers/cpufreq/exynos-cpufreq.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/err.h> #include <linux/clk.h> #include <linux/io.h> diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index b0dac7d6ba31..9e231f52150c 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -659,4 +659,4 @@ static struct platform_driver s5pv210_cpufreq_platdrv = { }, .probe = s5pv210_cpufreq_probe, }; -module_platform_driver(s5pv210_cpufreq_platdrv); +builtin_platform_driver(s5pv210_cpufreq_platdrv); diff --git a/drivers/cpuidle/cpuidle-at91.c b/drivers/cpuidle/cpuidle-at91.c index f2446c78d87c..9c5853b6ca4a 100644 --- a/drivers/cpuidle/cpuidle-at91.c +++ b/drivers/cpuidle/cpuidle-at91.c @@ -62,5 +62,4 @@ static struct platform_driver at91_cpuidle_driver = { }, .probe = at91_cpuidle_probe, }; - -module_platform_driver(at91_cpuidle_driver); +builtin_platform_driver(at91_cpuidle_driver); diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index 9445e6cc02be..c13feec89ea1 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -75,5 +75,4 @@ static struct platform_driver calxeda_cpuidle_plat_driver = { }, .probe = calxeda_cpuidle_probe, }; - -module_platform_driver(calxeda_cpuidle_plat_driver); +builtin_platform_driver(calxeda_cpuidle_plat_driver); diff --git a/drivers/cpuidle/cpuidle-zynq.c b/drivers/cpuidle/cpuidle-zynq.c index 543292b1d38e..6f4257fc56e5 100644 --- a/drivers/cpuidle/cpuidle-zynq.c +++ b/drivers/cpuidle/cpuidle-zynq.c @@ -73,5 +73,4 @@ static struct platform_driver zynq_cpuidle_driver = { }, .probe = zynq_cpuidle_probe, }; - -module_platform_driver(zynq_cpuidle_driver); +builtin_platform_driver(zynq_cpuidle_driver); diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 5bcd575fa96f..e6b658faef63 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -1034,8 +1034,8 @@ static int mv_cesa_get_sram(struct platform_device *pdev, &sram_size); cp->sram_size = sram_size; - cp->sram_pool = of_get_named_gen_pool(pdev->dev.of_node, - "marvell,crypto-srams", 0); + cp->sram_pool = of_gen_pool_get(pdev->dev.of_node, + "marvell,crypto-srams", 0); if (cp->sram_pool) { cp->sram = gen_pool_dma_alloc(cp->sram_pool, sram_size, &cp->sram_dma); diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c index 7e98084d3645..afea7fc625cc 100644 --- a/drivers/edac/octeon_edac-l2c.c +++ b/drivers/edac/octeon_edac-l2c.c @@ -151,7 +151,7 @@ static int octeon_l2c_probe(struct platform_device *pdev) l2c->ctl_name = "octeon_l2c_err"; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_l2t_err l2t_err; union cvmx_l2d_err l2d_err; diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index bb19e0732681..cda6dab5067a 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -234,7 +234,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) layers[0].size = 1; layers[0].is_virt_csrow = false; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_lmcx_mem_cfg0 cfg0; cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0)); diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c index 0f83c33a7d1f..2ab6cf24c959 100644 --- a/drivers/edac/octeon_edac-pc.c +++ b/drivers/edac/octeon_edac-pc.c @@ -73,7 +73,7 @@ static int co_cache_error_event(struct notifier_block *this, edac_device_handle_ce(p->ed, cpu, 0, "dcache"); /* Clear the error indication */ - if (OCTEON_IS_MODEL(OCTEON_FAM_2)) + if (OCTEON_IS_OCTEON2()) write_octeon_c0_dcacheerr(1); else write_octeon_c0_dcacheerr(0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 22866d1c3d69..01657830b470 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -425,6 +425,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, struct amdgpu_fence **fence); +int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner, + uint64_t seq, struct amdgpu_fence **fence); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_next(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); @@ -435,9 +437,6 @@ int amdgpu_fence_wait(struct amdgpu_fence *fence, bool interruptible); int amdgpu_fence_wait_any(struct amdgpu_device *adev, struct amdgpu_fence **fences, bool intr); -long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, - u64 *target_seq, bool intr, - long timeout); struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); void amdgpu_fence_unref(struct amdgpu_fence **fence); @@ -1622,6 +1621,7 @@ struct amdgpu_vce { unsigned fb_version; atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES]; + uint32_t img_size[AMDGPU_MAX_VCE_HANDLES]; struct delayed_work idle_work; const struct firmware *fw; /* VCE firmware */ struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 36d34e0afbc3..f82a2dd83874 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -30,6 +30,7 @@ #include <drm/drmP.h> #include "amdgpu.h" +#include "amdgpu_trace.h" static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, struct amdgpu_bo_list **result, @@ -124,6 +125,8 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, gws_obj = entry->robj; if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA) oa_obj = entry->robj; + + trace_amdgpu_bo_list_set(list, entry->robj); } for (i = 0; i < list->num_entries; ++i) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f09b2cba40ca..d63135bf29c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -181,8 +181,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } p->chunks[i].chunk_id = user_chunk.chunk_id; p->chunks[i].length_dw = user_chunk.length_dw; - if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB) - p->num_ibs++; size = p->chunks[i].length_dw; cdata = (void __user *)(unsigned long)user_chunk.chunk_data; @@ -199,7 +197,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto out; } - if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_FENCE) { + switch (p->chunks[i].chunk_id) { + case AMDGPU_CHUNK_ID_IB: + p->num_ibs++; + break; + + case AMDGPU_CHUNK_ID_FENCE: size = sizeof(struct drm_amdgpu_cs_chunk_fence); if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) { uint32_t handle; @@ -221,6 +224,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) r = -EINVAL; goto out; } + break; + + case AMDGPU_CHUNK_ID_DEPENDENCIES: + break; + + default: + r = -EINVAL; + goto out; } } @@ -445,8 +456,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); - for (i = 0; i < parser->num_ibs; i++) - amdgpu_ib_free(parser->adev, &parser->ibs[i]); + if (parser->ibs) + for (i = 0; i < parser->num_ibs; i++) + amdgpu_ib_free(parser->adev, &parser->ibs[i]); kfree(parser->ibs); if (parser->uf.bo) drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); @@ -654,6 +666,55 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return 0; } +static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + struct amdgpu_cs_parser *p) +{ + struct amdgpu_ib *ib; + int i, j, r; + + if (!p->num_ibs) + return 0; + + /* Add dependencies to first IB */ + ib = &p->ibs[0]; + for (i = 0; i < p->nchunks; ++i) { + struct drm_amdgpu_cs_chunk_dep *deps; + struct amdgpu_cs_chunk *chunk; + unsigned num_deps; + + chunk = &p->chunks[i]; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) + continue; + + deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); + + for (j = 0; j < num_deps; ++j) { + struct amdgpu_fence *fence; + struct amdgpu_ring *ring; + + r = amdgpu_cs_get_ring(adev, deps[j].ip_type, + deps[j].ip_instance, + deps[j].ring, &ring); + if (r) + return r; + + r = amdgpu_fence_recreate(ring, p->filp, + deps[j].handle, + &fence); + if (r) + return r; + + amdgpu_sync_fence(&ib->sync, fence); + amdgpu_fence_unref(&fence); + } + } + + return 0; +} + int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; @@ -688,11 +749,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) else DRM_ERROR("Failed to process the buffer list %d!\n", r); } - } else { + } + + if (!r) { reserved_buffers = true; r = amdgpu_cs_ib_fill(adev, &parser); } + if (!r) + r = amdgpu_cs_dependencies(adev, &parser); + if (r) { amdgpu_cs_parser_fini(&parser, r, reserved_buffers); up_read(&adev->exclusive_lock); @@ -730,9 +796,9 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; - uint64_t seq[AMDGPU_MAX_RINGS] = {0}; - struct amdgpu_ring *ring = NULL; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); + struct amdgpu_fence *fence = NULL; + struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; long r; @@ -745,9 +811,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (r) return r; - seq[ring->idx] = wait->in.handle; + r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); + if (r) + return r; - r = amdgpu_fence_wait_seq_timeout(adev, seq, true, timeout); + r = fence_wait_timeout(&fence->base, true, timeout); + amdgpu_fence_unref(&fence); amdgpu_ctx_put(ctx); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fec487d1c870..ba46be361c9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1191,7 +1191,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return -EINVAL; } - + adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); + if (adev->ip_block_enabled == NULL) + return -ENOMEM; if (adev->ip_blocks == NULL) { DRM_ERROR("No IP blocks found!\n"); @@ -1575,8 +1577,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); - if (adev->ip_block_enabled) - kfree(adev->ip_block_enabled); + kfree(adev->ip_block_enabled); adev->ip_block_enabled = NULL; adev->accel_working = false; /* free i2c buses */ @@ -2000,4 +2001,10 @@ int amdgpu_debugfs_init(struct drm_minor *minor) void amdgpu_debugfs_cleanup(struct drm_minor *minor) { } +#else +static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) +{ + return 0; +} +static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 5c9918d01bf9..a7189a1fa6a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -136,6 +136,38 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, } /** + * amdgpu_fence_recreate - recreate a fence from an user fence + * + * @ring: ring the fence is associated with + * @owner: creator of the fence + * @seq: user fence sequence number + * @fence: resulting amdgpu fence object + * + * Recreates a fence command from the user fence sequence number (all asics). + * Returns 0 on success, -ENOMEM on failure. + */ +int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner, + uint64_t seq, struct amdgpu_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + + if (seq > ring->fence_drv.sync_seq[ring->idx]) + return -EINVAL; + + *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + if ((*fence) == NULL) + return -ENOMEM; + + (*fence)->seq = seq; + (*fence)->ring = ring; + (*fence)->owner = owner; + fence_init(&(*fence)->base, &amdgpu_fence_ops, + &adev->fence_queue.lock, adev->fence_context + ring->idx, + (*fence)->seq); + return 0; +} + +/** * amdgpu_fence_check_signaled - callback from fence_queue * * this function is called with fence_queue lock held, which is also used @@ -517,12 +549,14 @@ static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq) * the wait timeout, or an error for all other cases. * -EDEADLK is returned when a GPU lockup has been detected. */ -long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, u64 *target_seq, - bool intr, long timeout) +static long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, + u64 *target_seq, bool intr, + long timeout) { uint64_t last_seq[AMDGPU_MAX_RINGS]; bool signaled; - int i, r; + int i; + long r; if (timeout == 0) { return amdgpu_fence_any_seq_signaled(adev, target_seq); @@ -1023,7 +1057,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) amdgpu_fence_process(ring); - seq_printf(m, "--- ring %d ---\n", i); + seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); seq_printf(m, "Last emitted 0x%016llx\n", @@ -1031,7 +1065,8 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) for (j = 0; j < AMDGPU_MAX_RINGS; ++j) { struct amdgpu_ring *other = adev->rings[j]; - if (i != j && other && other->fence_drv.initialized) + if (i != j && other && other->fence_drv.initialized && + ring->fence_drv.sync_seq[j]) seq_printf(m, "Last sync to ring %d 0x%016llx\n", j, ring->fence_drv.sync_seq[j]); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 0ec222295fee..975edb1000a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -496,7 +496,7 @@ error_unreserve: error_free: drm_free_large(vm_bos); - if (r) + if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } @@ -525,8 +525,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - invalid_flags = ~(AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE); + invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | + AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE); if ((args->flags & invalid_flags)) { dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n", args->flags, invalid_flags); @@ -579,7 +579,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, break; } - if (!r) + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) amdgpu_gem_va_update_vm(adev, bo_va); drm_gem_object_unreference_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index b56dd64bd4ea..961d7265c286 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -30,19 +30,21 @@ TRACE_EVENT(amdgpu_cs, TP_PROTO(struct amdgpu_cs_parser *p, int i), TP_ARGS(p, i), TP_STRUCT__entry( + __field(struct amdgpu_bo_list *, bo_list) __field(u32, ring) __field(u32, dw) __field(u32, fences) ), TP_fast_assign( + __entry->bo_list = p->bo_list; __entry->ring = p->ibs[i].ring->idx; __entry->dw = p->ibs[i].length_dw; __entry->fences = amdgpu_fence_count_emitted( p->ibs[i].ring); ), - TP_printk("ring=%u, dw=%u, fences=%u", - __entry->ring, __entry->dw, + TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", + __entry->bo_list, __entry->ring, __entry->dw, __entry->fences) ); @@ -61,6 +63,54 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring) ); +TRACE_EVENT(amdgpu_vm_bo_map, + TP_PROTO(struct amdgpu_bo_va *bo_va, + struct amdgpu_bo_va_mapping *mapping), + TP_ARGS(bo_va, mapping), + TP_STRUCT__entry( + __field(struct amdgpu_bo *, bo) + __field(long, start) + __field(long, last) + __field(u64, offset) + __field(u32, flags) + ), + + TP_fast_assign( + __entry->bo = bo_va->bo; + __entry->start = mapping->it.start; + __entry->last = mapping->it.last; + __entry->offset = mapping->offset; + __entry->flags = mapping->flags; + ), + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + __entry->bo, __entry->start, __entry->last, + __entry->offset, __entry->flags) +); + +TRACE_EVENT(amdgpu_vm_bo_unmap, + TP_PROTO(struct amdgpu_bo_va *bo_va, + struct amdgpu_bo_va_mapping *mapping), + TP_ARGS(bo_va, mapping), + TP_STRUCT__entry( + __field(struct amdgpu_bo *, bo) + __field(long, start) + __field(long, last) + __field(u64, offset) + __field(u32, flags) + ), + + TP_fast_assign( + __entry->bo = bo_va->bo; + __entry->start = mapping->it.start; + __entry->last = mapping->it.last; + __entry->offset = mapping->offset; + __entry->flags = mapping->flags; + ), + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + __entry->bo, __entry->start, __entry->last, + __entry->offset, __entry->flags) +); + TRACE_EVENT(amdgpu_vm_bo_update, TP_PROTO(struct amdgpu_bo_va_mapping *mapping), TP_ARGS(mapping), @@ -121,6 +171,21 @@ TRACE_EVENT(amdgpu_vm_flush, __entry->pd_addr, __entry->ring, __entry->id) ); +TRACE_EVENT(amdgpu_bo_list_set, + TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), + TP_ARGS(list, bo), + TP_STRUCT__entry( + __field(struct amdgpu_bo_list *, list) + __field(struct amdgpu_bo *, bo) + ), + + TP_fast_assign( + __entry->list = list; + __entry->bo = bo; + ), + TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) +); + DECLARE_EVENT_CLASS(amdgpu_fence_request, TP_PROTO(struct drm_device *dev, int ring, u32 seqno), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d3706a498293..dd3415d2e45d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -674,7 +674,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) return 0; if (gtt && gtt->userptr) { - ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); + ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 1127a504f118..d3ca73090e39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -464,28 +464,42 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, * @p: parser context * @lo: address of lower dword * @hi: address of higher dword + * @size: minimum size * * Patch relocation inside command stream with real buffer address */ -int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi) +static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, + int lo, int hi, unsigned size, uint32_t index) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_ib *ib = &p->ibs[ib_idx]; struct amdgpu_bo *bo; uint64_t addr; + if (index == 0xffffffff) + index = 0; + addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr += ((uint64_t)size) * ((uint64_t)index); mapping = amdgpu_cs_find_mapping(p, addr, &bo); if (mapping == NULL) { - DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d\n", + DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", + addr, lo, hi, size, index); + return -EINVAL; + } + + if ((addr + (uint64_t)size) > + ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", addr, lo, hi); return -EINVAL; } addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; addr += amdgpu_bo_gpu_offset(bo); + addr -= ((uint64_t)size) * ((uint64_t)index); ib->ptr[lo] = addr & 0xFFFFFFFF; ib->ptr[hi] = addr >> 32; @@ -494,6 +508,48 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int } /** + * amdgpu_vce_validate_handle - validate stream handle + * + * @p: parser context + * @handle: handle to validate + * @allocated: allocated a new handle? + * + * Validates the handle and return the found session index or -EINVAL + * we we don't have another free session index. + */ +static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, + uint32_t handle, bool *allocated) +{ + unsigned i; + + *allocated = false; + + /* validate the handle */ + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { + if (atomic_read(&p->adev->vce.handles[i]) == handle) { + if (p->adev->vce.filp[i] != p->filp) { + DRM_ERROR("VCE handle collision detected!\n"); + return -EINVAL; + } + return i; + } + } + + /* handle not found try to alloc a new one */ + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { + if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { + p->adev->vce.filp[i] = p->filp; + p->adev->vce.img_size[i] = 0; + *allocated = true; + return i; + } + } + + DRM_ERROR("No more free VCE handles!\n"); + return -EINVAL; +} + +/** * amdgpu_vce_cs_parse - parse and validate the command stream * * @p: parser context @@ -501,10 +557,15 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int */ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) { - uint32_t handle = 0; - bool destroy = false; - int i, r, idx = 0; struct amdgpu_ib *ib = &p->ibs[ib_idx]; + unsigned fb_idx = 0, bs_idx = 0; + int session_idx = -1; + bool destroyed = false; + bool created = false; + bool allocated = false; + uint32_t tmp, handle = 0; + uint32_t *size = &tmp; + int i, r = 0, idx = 0; amdgpu_vce_note_usage(p->adev); @@ -514,16 +575,44 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); - return -EINVAL; + r = -EINVAL; + goto out; + } + + if (destroyed) { + DRM_ERROR("No other command allowed after destroy!\n"); + r = -EINVAL; + goto out; } switch (cmd) { case 0x00000001: // session handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + session_idx = amdgpu_vce_validate_handle(p, handle, + &allocated); + if (session_idx < 0) + return session_idx; + size = &p->adev->vce.img_size[session_idx]; break; case 0x00000002: // task info + fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); + bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + break; + case 0x01000001: // create + created = true; + if (!allocated) { + DRM_ERROR("Handle already in use!\n"); + r = -EINVAL; + goto out; + } + + *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * + amdgpu_get_ib_value(p, ib_idx, idx + 10) * + 8 * 3 / 2; + break; + case 0x04000001: // config extension case 0x04000002: // pic control case 0x04000005: // rate control @@ -534,60 +623,74 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x03000001: // encode - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, + *size, 0); if (r) - return r; + goto out; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, + *size / 3, 0); if (r) - return r; + goto out; break; case 0x02000001: // destroy - destroy = true; + destroyed = true; break; case 0x05000001: // context buffer + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + *size * 2, 0); + if (r) + goto out; + break; + case 0x05000004: // video bitstream buffer + tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + tmp, bs_idx); + if (r) + goto out; + break; + case 0x05000005: // feedback buffer - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + 4096, fb_idx); if (r) - return r; + goto out; break; default: DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); - return -EINVAL; + r = -EINVAL; + goto out; } - idx += len / 4; - } - - if (destroy) { - /* IB contains a destroy msg, free the handle */ - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) - atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0); + if (session_idx == -1) { + DRM_ERROR("no session command at start of IB\n"); + r = -EINVAL; + goto out; + } - return 0; + idx += len / 4; } - /* create or encode, validate the handle */ - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { - if (atomic_read(&p->adev->vce.handles[i]) == handle) - return 0; + if (allocated && !created) { + DRM_ERROR("New session without create command!\n"); + r = -ENOENT; } - /* handle not found try to alloc a new one */ - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { - if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { - p->adev->vce.filp[i] = p->filp; - return 0; - } +out: + if ((!r && destroyed) || (r && allocated)) { + /* + * IB contains a destroy msg or we have allocated an + * handle and got an error, anyway free the handle + */ + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) + atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0); } - DRM_ERROR("No more free VCE handles!\n"); - - return -EINVAL; + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index b6a9d0956c60..7ccdb5927da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -33,7 +33,6 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, struct amdgpu_fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring, struct amdgpu_semaphore *semaphore, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 407882b233c7..9a4e3b63f1cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1001,6 +1001,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, list_add(&mapping->list, &bo_va->mappings); interval_tree_insert(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_map(bo_va, mapping); bo_va->addr = 0; @@ -1058,6 +1059,7 @@ error_free: mutex_lock(&vm->mutex); list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); kfree(mapping); error_unlock: @@ -1099,6 +1101,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, mutex_lock(&vm->mutex); list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (bo_va->addr) { /* clear the old address */ @@ -1139,6 +1142,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) { list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (bo_va->addr) list_add(&mapping->list, &vm->freed); else diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 5dab578d6462..341c56681841 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2256,10 +2256,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } - adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); - if (adev->ip_block_enabled == NULL) - return -ENOMEM; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 220865a44814..d19085a97064 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -552,4 +552,10 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 +/* valid for both DEFAULT_MTYPE and APE1_MTYPE */ +enum { + MTYPE_CACHED = 0, + MTYPE_NONCACHED = 3 +}; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index e4936a452bc6..f75a31df30bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev) pi->mgcg_cgtt_local1 = 0x0; pi->clock_slow_down_step = 25000; pi->skip_clock_slow_down = 1; - pi->enable_nb_ps_policy = 1; + pi->enable_nb_ps_policy = 0; pi->caps_power_containment = true; pi->caps_cac = true; pi->didt_enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.h b/drivers/gpu/drm/amd/amdgpu/cz_dpm.h index 782a74107664..99e1afc89629 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.h @@ -46,7 +46,7 @@ /* Do not change the following, it is also defined in SMU8.h */ #define SMU_EnabledFeatureScoreboard_AcpDpmOn 0x00000001 -#define SMU_EnabledFeatureScoreboard_SclkDpmOn 0x00100000 +#define SMU_EnabledFeatureScoreboard_SclkDpmOn 0x00200000 #define SMU_EnabledFeatureScoreboard_UvdDpmOn 0x00800000 #define SMU_EnabledFeatureScoreboard_VceDpmOn 0x01000000 diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 72c27ac915f2..aaca8d663f2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3379,7 +3379,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, uint32_t disp_int, mask, int_control, tmp; unsigned hpd; - if (entry->src_data > 6) { + if (entry->src_data >= adev->mode_info.num_hpd) { DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index cb7907447b81..2c188fb9fd22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2010,6 +2010,46 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev, } /** + * gmc_v7_0_init_compute_vmid - gart enable + * + * @rdev: amdgpu_device pointer + * + * Initialize compute vmid sh_mem registers + * + */ +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) +static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + cik_srbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, 1); + WREG32(mmSH_MEM_APE1_LIMIT, 0); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + } + cik_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +/** * gfx_v7_0_gpu_init - setup the 3D engine * * @adev: amdgpu_device pointer @@ -2230,6 +2270,8 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + gmc_v7_0_init_compute_vmid(adev); + WREG32(mmSX_DEBUG_1, 0x20); WREG32(mmTA_CNTL_AUX, 0x00010000); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 14242bd33363..7b683fb2173c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1894,6 +1894,51 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); } +/** + * gmc_v8_0_init_compute_vmid - gart enable + * + * @rdev: amdgpu_device pointer + * + * Initialize compute vmid sh_mem registers + * + */ +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) +static void gmc_v8_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << + SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + SH_MEM_CONFIG__PRIVATE_ATC_MASK; + + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + vi_srbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, 1); + WREG32(mmSH_MEM_APE1_LIMIT, 0); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + } + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) { u32 gb_addr_config; @@ -2113,6 +2158,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + gmc_v8_0_init_compute_vmid(adev); + mutex_lock(&adev->grbm_idx_mutex); /* * making sure that the following register writes will be broadcasted @@ -3081,7 +3128,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); + 0x7FFFF << 2); } tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -3097,6 +3144,12 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_wptr = ring->wptr; + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index e3c1fde75363..7bb37b93993f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -439,6 +439,31 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) } /** + * sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch (VI). + */ +static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl; + int i; + + for (i = 0; i < SDMA_MAX_INSTANCE; i++) { + f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); + if (enable) + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 1); + else + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 0); + WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); + } +} + +/** * sdma_v3_0_enable - stop the async dma engines * * @adev: amdgpu_device pointer @@ -648,6 +673,8 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) /* unhalt the MEs */ sdma_v3_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v3_0_ctx_switch_enable(adev, true); /* start the gfx rings and rlc compute queues */ r = sdma_v3_0_gfx_resume(adev); @@ -1079,6 +1106,7 @@ static int sdma_v3_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + sdma_v3_0_ctx_switch_enable(adev, false); sdma_v3_0_enable(adev, false); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 90fc93c2c1d0..fa5a4448531d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1189,10 +1189,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } - adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); - if (adev->ip_block_enabled == NULL) - return -ENOMEM; - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 619dad1b2386..9daa2883ac18 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -516,17 +516,17 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct page *page_table; if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) - continue; + break; pd = ppgtt->pdp.page_directory[pdpe]; if (WARN_ON(!pd->page_table[pde])) - continue; + break; pt = pd->page_table[pde]; if (WARN_ON(!pt->page)) - continue; + break; page_table = pt->page; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f5edb3504167..2030f602cbf8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3491,6 +3491,7 @@ enum skl_disp_power_wells { #define BLM_POLARITY_PNV (1 << 0) /* pnv only */ #define BLC_HIST_CTL (dev_priv->info.display_mmio_offset + 0x61260) +#define BLM_HISTOGRAM_ENABLE (1 << 31) /* New registers for PCH-split platforms. Safe where new bits show up, the * register layout machtes with gen4 BLC_PWM_CTL[12]. */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index dcb1d25d6f05..1b61f9810387 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13303,6 +13303,16 @@ intel_check_primary_plane(struct drm_plane *plane, intel_crtc->atomic.wait_vblank = true; } + /* + * FIXME: Actually if we will still have any other plane enabled + * on the pipe we could let IPS enabled still, but for + * now lets consider that when we make primary invisible + * by setting DSPCNTR to 0 on update_primary_plane function + * IPS needs to be disable. + */ + if (!state->visible || !fb) + intel_crtc->atomic.disable_ips = true; + intel_crtc->atomic.fb_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); @@ -13400,6 +13410,9 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc) if (intel_crtc->atomic.disable_fbc) intel_fbc_disable(dev); + if (intel_crtc->atomic.disable_ips) + hsw_disable_ips(intel_crtc); + if (intel_crtc->atomic.pre_disable_primary) intel_pre_disable_primary(crtc); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 76afc62373d7..6e8faa253792 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1140,6 +1140,9 @@ skl_edp_set_pll_config(struct intel_crtc_state *pipe_config, int link_clock) static void hsw_dp_set_ddi_pll_sel(struct intel_crtc_state *pipe_config, int link_bw) { + memset(&pipe_config->dpll_hw_state, 0, + sizeof(pipe_config->dpll_hw_state)); + switch (link_bw) { case DP_LINK_BW_1_62: pipe_config->ddi_pll_sel = PORT_CLK_SEL_LCPLL_810; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2afb31a46275..105928382e21 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -485,6 +485,7 @@ struct intel_crtc_atomic_commit { /* Sleepable operations to perform before commit */ bool wait_for_flips; bool disable_fbc; + bool disable_ips; bool pre_disable_primary; bool update_wm; unsigned disabled_planes; diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 7d83527f95f7..55aad2322e10 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -907,6 +907,14 @@ static void i9xx_enable_backlight(struct intel_connector *connector) /* XXX: combine this into above write? */ intel_panel_actually_set_backlight(connector, panel->backlight.level); + + /* + * Needed to enable backlight on some 855gm models. BLC_HIST_CTL is + * 855gm only, but checking for gen2 is safe, as 855gm is the only gen2 + * that has backlight. + */ + if (IS_GEN2(dev)) + I915_WRITE(BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE); } static void i965_enable_backlight(struct intel_connector *connector) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b0688b0c8908..4ecf5caa8c6d 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4604,6 +4604,31 @@ void cik_compute_set_wptr(struct radeon_device *rdev, WDOORBELL32(ring->doorbell_index, ring->wptr); } +static void cik_compute_stop(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + u32 j, tmp; + + cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); + /* Disable wptr polling. */ + tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); + tmp &= ~WPTR_POLL_EN; + WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); + /* Disable HQD. */ + if (RREG32(CP_HQD_ACTIVE) & 1) { + WREG32(CP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < rdev->usec_timeout; j++) { + if (!(RREG32(CP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32(CP_HQD_DEQUEUE_REQUEST, 0); + WREG32(CP_HQD_PQ_RPTR, 0); + WREG32(CP_HQD_PQ_WPTR, 0); + } + cik_srbm_select(rdev, 0, 0, 0, 0); +} + /** * cik_cp_compute_enable - enable/disable the compute CP MEs * @@ -4617,6 +4642,15 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) if (enable) WREG32(CP_MEC_CNTL, 0); else { + /* + * To make hibernation reliable we need to clear compute ring + * configuration before halting the compute ring. + */ + mutex_lock(&rdev->srbm_mutex); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + mutex_unlock(&rdev->srbm_mutex); + WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index f86eb54e7763..d16f2eebd95e 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -268,6 +268,17 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev) } rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; + + /* FIXME use something else than big hammer but after few days can not + * seem to find good combination so reset SDMA blocks as it seems we + * do not shut them down properly. This fix hibernation and does not + * affect suspend to ram. + */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); + (void)RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + (void)RREG32(SRBM_SOFT_RESET); } /** diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index c89215275053..fa719c53449b 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -469,22 +469,22 @@ void radeon_audio_detect(struct drm_connector *connector, dig = radeon_encoder->enc_priv; if (status == connector_status_connected) { - struct radeon_connector *radeon_connector; - int sink_type; - if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { radeon_encoder->audio = NULL; return; } - radeon_connector = to_radeon_connector(connector); - sink_type = radeon_dp_getsinktype(radeon_connector); + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && - sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) - radeon_encoder->audio = rdev->audio.dp_funcs; - else + if (radeon_dp_getsinktype(radeon_connector) == + CONNECTOR_OBJECT_ID_DISPLAYPORT) + radeon_encoder->audio = rdev->audio.dp_funcs; + else + radeon_encoder->audio = rdev->audio.hdmi_funcs; + } else { radeon_encoder->audio = rdev->audio.hdmi_funcs; + } dig->afmt->pin = radeon_audio_get_pin(connector->encoder); radeon_audio_enable(rdev, dig->afmt->pin, 0xf); diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index aeb676708e60..634793ea8418 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -257,7 +257,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; - info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index edafd3c2b170..06ac59fe332a 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -719,7 +719,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) return 0; if (gtt && gtt->userptr) { - ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); + ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 3662157c2b15..ec10533a49b8 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -1129,12 +1129,12 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, interval_tree_remove(&bo_va->it, &vm->va); spin_lock(&vm->status_lock); - if (list_empty(&bo_va->vm_status)) { + list_del(&bo_va->vm_status); + if (bo_va->it.start || bo_va->it.last) { bo_va->bo = radeon_bo_ref(bo_va->bo); list_add(&bo_va->vm_status, &vm->freed); } else { radeon_fence_unref(&bo_va->last_pt_update); - list_del(&bo_va->vm_status); kfree(bo_va); } spin_unlock(&vm->status_lock); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 3962176ee713..01b558fe3695 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -21,6 +21,7 @@ #include <drm/drm_fb_helper.h> #include <linux/dma-mapping.h> #include <linux/pm_runtime.h> +#include <linux/module.h> #include <linux/of_graph.h> #include <linux/component.h> diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 4557f335a8a5..dc65161d7cad 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -19,6 +19,7 @@ #include <drm/drm_plane_helper.h> #include <linux/kernel.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/clk.h> #include <linux/of.h> diff --git a/drivers/hsi/controllers/omap_ssi.h b/drivers/hsi/controllers/omap_ssi.h index 9d056417d88c..f9aaf37262be 100644 --- a/drivers/hsi/controllers/omap_ssi.h +++ b/drivers/hsi/controllers/omap_ssi.h @@ -24,6 +24,7 @@ #define __LINUX_HSI_OMAP_SSI_H__ #include <linux/device.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/hsi/hsi.h> #include <linux/gpio.h> diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 2a808822af21..37c16afe007a 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -777,7 +777,7 @@ static int __init i8k_init_hwmon(void) if (err >= 0) i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2; - i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell-smm", + i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell_smm", NULL, i8k_groups); if (IS_ERR(i8k_hwmon_dev)) { err = PTR_ERR(i8k_hwmon_dev); diff --git a/drivers/hwmon/mcp3021.c b/drivers/hwmon/mcp3021.c index d219c06a857b..972444a14cca 100644 --- a/drivers/hwmon/mcp3021.c +++ b/drivers/hwmon/mcp3021.c @@ -31,14 +31,11 @@ /* output format */ #define MCP3021_SAR_SHIFT 2 #define MCP3021_SAR_MASK 0x3ff - #define MCP3021_OUTPUT_RES 10 /* 10-bit resolution */ -#define MCP3021_OUTPUT_SCALE 4 #define MCP3221_SAR_SHIFT 0 #define MCP3221_SAR_MASK 0xfff #define MCP3221_OUTPUT_RES 12 /* 12-bit resolution */ -#define MCP3221_OUTPUT_SCALE 1 enum chips { mcp3021, @@ -54,7 +51,6 @@ struct mcp3021_data { u16 sar_shift; u16 sar_mask; u8 output_res; - u8 output_scale; }; static int mcp3021_read16(struct i2c_client *client) @@ -84,13 +80,7 @@ static int mcp3021_read16(struct i2c_client *client) static inline u16 volts_from_reg(struct mcp3021_data *data, u16 val) { - if (val == 0) - return 0; - - val = val * data->output_scale - data->output_scale / 2; - - return val * DIV_ROUND_CLOSEST(data->vdd, - (1 << data->output_res) * data->output_scale); + return DIV_ROUND_CLOSEST(data->vdd * val, 1 << data->output_res); } static ssize_t show_in_input(struct device *dev, struct device_attribute *attr, @@ -132,14 +122,12 @@ static int mcp3021_probe(struct i2c_client *client, data->sar_shift = MCP3021_SAR_SHIFT; data->sar_mask = MCP3021_SAR_MASK; data->output_res = MCP3021_OUTPUT_RES; - data->output_scale = MCP3021_OUTPUT_SCALE; break; case mcp3221: data->sar_shift = MCP3221_SAR_SHIFT; data->sar_mask = MCP3221_SAR_MASK; data->output_res = MCP3221_OUTPUT_RES; - data->output_scale = MCP3221_OUTPUT_SCALE; break; } diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 55765790907b..28fcb2e246d5 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -547,7 +547,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj, if (index >= 9 && index < 18 && (reg & 0x0c) != 0x04 && (reg & 0x0c) != 0x08) /* RD2 */ return 0; - if (index >= 18 && index < 27 && (reg & 0x30) != 0x10) /* RD3 */ + if (index >= 18 && index < 27 && (reg & 0x30) != 0x20) /* RD3 */ return 0; if (index >= 27 && index < 35) /* local */ return attr->mode; diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c index f3755e0aa935..f80acb36ff07 100644 --- a/drivers/mailbox/pl320-ipc.c +++ b/drivers/mailbox/pl320-ipc.c @@ -195,4 +195,4 @@ static int __init ipc_init(void) { return amba_driver_register(&pl320_driver); } -module_init(ipc_init); +subsys_initcall(ipc_init); diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 9df2b6801f76..b2b411da297b 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -43,6 +43,7 @@ #include <linux/regulator/consumer.h> #include <linux/pinctrl/consumer.h> #include <linux/pm_runtime.h> +#include <linux/pm_wakeirq.h> #include <linux/platform_data/hsmmc-omap.h> /* OMAP HSMMC Host Controller Registers */ @@ -218,7 +219,6 @@ struct omap_hsmmc_host { unsigned int flags; #define AUTO_CMD23 (1 << 0) /* Auto CMD23 support */ #define HSMMC_SDIO_IRQ_ENABLED (1 << 1) /* SDIO irq enabled */ -#define HSMMC_WAKE_IRQ_ENABLED (1 << 2) struct omap_hsmmc_next next_data; struct omap_hsmmc_platform_data *pdata; @@ -1117,22 +1117,6 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id) -{ - struct omap_hsmmc_host *host = dev_id; - - /* cirq is level triggered, disable to avoid infinite loop */ - spin_lock(&host->irq_lock); - if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { - disable_irq_nosync(host->wake_irq); - host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; - } - spin_unlock(&host->irq_lock); - pm_request_resume(host->dev); /* no use counter */ - - return IRQ_HANDLED; -} - static void set_sd_bus_power(struct omap_hsmmc_host *host) { unsigned long i; @@ -1665,7 +1649,6 @@ static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable) static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) { - struct mmc_host *mmc = host->mmc; int ret; /* @@ -1677,11 +1660,7 @@ static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) if (!host->dev->of_node || !host->wake_irq) return -ENODEV; - /* Prevent auto-enabling of IRQ */ - irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN); - ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - mmc_hostname(mmc), host); + ret = dev_pm_set_dedicated_wake_irq(host->dev, host->wake_irq); if (ret) { dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n"); goto err; @@ -1718,7 +1697,7 @@ static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) return 0; err_free_irq: - devm_free_irq(host->dev, host->wake_irq, host); + dev_pm_clear_wake_irq(host->dev); err: dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n"); host->wake_irq = 0; @@ -2007,6 +1986,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev) omap_hsmmc_ops.multi_io_quirk = omap_hsmmc_multi_io_quirk; } + device_init_wakeup(&pdev->dev, true); pm_runtime_enable(host->dev); pm_runtime_get_sync(host->dev); pm_runtime_set_autosuspend_delay(host->dev, MMC_AUTOSUSPEND_DELAY); @@ -2147,6 +2127,7 @@ err_slot_name: if (host->use_reg) omap_hsmmc_reg_put(host); err_irq: + device_init_wakeup(&pdev->dev, false); if (host->tx_chan) dma_release_channel(host->tx_chan); if (host->rx_chan) @@ -2178,6 +2159,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev) pm_runtime_put_sync(host->dev); pm_runtime_disable(host->dev); + device_init_wakeup(&pdev->dev, false); if (host->dbclk) clk_disable_unprepare(host->dbclk); @@ -2204,11 +2186,6 @@ static int omap_hsmmc_suspend(struct device *dev) OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP); } - /* do not wake up due to sdio irq */ - if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && - !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) - disable_irq(host->wake_irq); - if (host->dbclk) clk_disable_unprepare(host->dbclk); @@ -2233,11 +2210,6 @@ static int omap_hsmmc_resume(struct device *dev) omap_hsmmc_conf_bus_power(host); omap_hsmmc_protect_card(host); - - if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && - !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) - enable_irq(host->wake_irq); - pm_runtime_mark_last_busy(host->dev); pm_runtime_put_autosuspend(host->dev); return 0; @@ -2277,10 +2249,6 @@ static int omap_hsmmc_runtime_suspend(struct device *dev) } pinctrl_pm_select_idle_state(dev); - - WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED); - enable_irq(host->wake_irq); - host->flags |= HSMMC_WAKE_IRQ_ENABLED; } else { pinctrl_pm_select_idle_state(dev); } @@ -2302,11 +2270,6 @@ static int omap_hsmmc_runtime_resume(struct device *dev) spin_lock_irqsave(&host->irq_lock, flags); if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && (host->flags & HSMMC_SDIO_IRQ_ENABLED)) { - /* sdio irq flag can't change while in runtime suspend */ - if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { - disable_irq_nosync(host->wake_irq); - host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; - } pinctrl_pm_select_default_state(host->dev); diff --git a/drivers/pcmcia/xxs1500_ss.c b/drivers/pcmcia/xxs1500_ss.c index 4c04360f378b..b2a189507fc3 100644 --- a/drivers/pcmcia/xxs1500_ss.c +++ b/drivers/pcmcia/xxs1500_ss.c @@ -11,6 +11,7 @@ #include <linux/io.h> #include <linux/ioport.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/pm.h> #include <linux/resource.h> diff --git a/drivers/platform/goldfish/pdev_bus.c b/drivers/platform/goldfish/pdev_bus.c index 8c43589c3edb..1f52462f4cdd 100644 --- a/drivers/platform/goldfish/pdev_bus.c +++ b/drivers/platform/goldfish/pdev_bus.c @@ -220,20 +220,10 @@ free_resources: return ret; } -static int goldfish_pdev_bus_remove(struct platform_device *pdev) -{ - iounmap(pdev_bus_base); - free_irq(pdev_bus_irq, pdev); - release_mem_region(pdev_bus_addr, pdev_bus_len); - return 0; -} - static struct platform_driver goldfish_pdev_bus_driver = { .probe = goldfish_pdev_bus_probe, - .remove = goldfish_pdev_bus_remove, .driver = { .name = "goldfish_pdev_bus" } }; - -module_platform_driver(goldfish_pdev_bus_driver); +builtin_platform_driver(goldfish_pdev_bus_driver); diff --git a/drivers/power/reset/syscon-reboot.c b/drivers/power/reset/syscon-reboot.c index d3c7d245ae63..7d0d269a0837 100644 --- a/drivers/power/reset/syscon-reboot.c +++ b/drivers/power/reset/syscon-reboot.c @@ -88,4 +88,4 @@ static struct platform_driver syscon_reboot_driver = { .of_match_table = syscon_reboot_of_match, }, }; -module_platform_driver(syscon_reboot_driver); +builtin_platform_driver(syscon_reboot_driver); diff --git a/drivers/regulator/max77802.c b/drivers/regulator/max77802.c index 6af41abccacb..c07ee13bd470 100644 --- a/drivers/regulator/max77802.c +++ b/drivers/regulator/max77802.c @@ -27,6 +27,7 @@ #include <linux/gpio.h> #include <linux/slab.h> #include <linux/gpio/consumer.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/regulator/driver.h> #include <linux/regulator/machine.h> diff --git a/drivers/soc/qcom/spm.c b/drivers/soc/qcom/spm.c index b562af816c0a..b04b05a0904e 100644 --- a/drivers/soc/qcom/spm.c +++ b/drivers/soc/qcom/spm.c @@ -260,7 +260,7 @@ static int __init qcom_cpuidle_init(struct device_node *cpu_node, int cpu) /* We have atleast one power down mode */ cpumask_clear(&mask); cpumask_set_cpu(cpu, &mask); - qcom_scm_set_warm_boot_addr(cpu_resume, &mask); + qcom_scm_set_warm_boot_addr(cpu_resume_arm, &mask); } per_cpu(qcom_idle_ops, cpu) = fns; diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index cc119d15dd16..75d0457a77b7 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -1021,7 +1021,7 @@ static struct platform_driver tegra_pmc_driver = { }, .probe = tegra_pmc_probe, }; -module_platform_driver(tegra_pmc_driver); +builtin_platform_driver(tegra_pmc_driver); /* * Early initialization to allow access to registers in the very early boot diff --git a/drivers/soc/versatile/soc-realview.c b/drivers/soc/versatile/soc-realview.c index 1a07bf540fec..e642c4540dda 100644 --- a/drivers/soc/versatile/soc-realview.c +++ b/drivers/soc/versatile/soc-realview.c @@ -142,4 +142,4 @@ static struct platform_driver realview_soc_driver = { .of_match_table = realview_soc_of_match, }, }; -module_platform_driver(realview_soc_driver); +builtin_platform_driver(realview_soc_driver); diff --git a/drivers/tty/metag_da.c b/drivers/tty/metag_da.c index 3774600741d8..9325262289f9 100644 --- a/drivers/tty/metag_da.c +++ b/drivers/tty/metag_da.c @@ -640,25 +640,7 @@ err_destroy_ports: put_tty_driver(channel_driver); return ret; } - -static void dashtty_exit(void) -{ - int nport; - struct dashtty_port *dport; - - del_timer_sync(&put_timer); - kthread_stop(dashtty_thread); - del_timer_sync(&poll_timer); - tty_unregister_driver(channel_driver); - for (nport = 0; nport < NUM_TTY_CHANNELS; nport++) { - dport = &dashtty_ports[nport]; - tty_port_destroy(&dport->port); - } - put_tty_driver(channel_driver); -} - -module_init(dashtty_init); -module_exit(dashtty_exit); +device_initcall(dashtty_init); #ifdef CONFIG_DA_CONSOLE diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 978204333c94..d75a66c72750 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -22,6 +22,7 @@ #include <linux/pm_runtime.h> #include <linux/console.h> #include <linux/pm_qos.h> +#include <linux/pm_wakeirq.h> #include <linux/dma-mapping.h> #include "8250.h" @@ -552,17 +553,6 @@ static void omap8250_uart_qos_work(struct work_struct *work) pm_qos_update_request(&priv->pm_qos_request, priv->latency); } -static irqreturn_t omap_wake_irq(int irq, void *dev_id) -{ - struct uart_port *port = dev_id; - int ret; - - ret = port->handle_irq(port); - if (ret) - return IRQ_HANDLED; - return IRQ_NONE; -} - #ifdef CONFIG_SERIAL_8250_DMA static int omap_8250_dma_handle_irq(struct uart_port *port); #endif @@ -596,11 +586,9 @@ static int omap_8250_startup(struct uart_port *port) int ret; if (priv->wakeirq) { - ret = request_irq(priv->wakeirq, omap_wake_irq, - port->irqflags, "uart wakeup irq", port); + ret = dev_pm_set_dedicated_wake_irq(port->dev, priv->wakeirq); if (ret) return ret; - disable_irq(priv->wakeirq); } pm_runtime_get_sync(port->dev); @@ -649,8 +637,7 @@ static int omap_8250_startup(struct uart_port *port) err: pm_runtime_mark_last_busy(port->dev); pm_runtime_put_autosuspend(port->dev); - if (priv->wakeirq) - free_irq(priv->wakeirq, port); + dev_pm_clear_wake_irq(port->dev); return ret; } @@ -682,10 +669,8 @@ static void omap_8250_shutdown(struct uart_port *port) pm_runtime_mark_last_busy(port->dev); pm_runtime_put_autosuspend(port->dev); - free_irq(port->irq, port); - if (priv->wakeirq) - free_irq(priv->wakeirq, port); + dev_pm_clear_wake_irq(port->dev); } static void omap_8250_throttle(struct uart_port *port) @@ -1226,31 +1211,6 @@ static int omap8250_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM - -static inline void omap8250_enable_wakeirq(struct omap8250_priv *priv, - bool enable) -{ - if (!priv->wakeirq) - return; - - if (enable) - enable_irq(priv->wakeirq); - else - disable_irq_nosync(priv->wakeirq); -} - -static void omap8250_enable_wakeup(struct omap8250_priv *priv, - bool enable) -{ - if (enable == priv->wakeups_enabled) - return; - - omap8250_enable_wakeirq(priv, enable); - priv->wakeups_enabled = enable; -} -#endif - #ifdef CONFIG_PM_SLEEP static int omap8250_prepare(struct device *dev) { @@ -1277,11 +1237,6 @@ static int omap8250_suspend(struct device *dev) serial8250_suspend_port(priv->line); flush_work(&priv->qos_work); - - if (device_may_wakeup(dev)) - omap8250_enable_wakeup(priv, true); - else - omap8250_enable_wakeup(priv, false); return 0; } @@ -1289,9 +1244,6 @@ static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); - if (device_may_wakeup(dev)) - omap8250_enable_wakeup(priv, false); - serial8250_resume_port(priv->line); return 0; } @@ -1333,7 +1285,6 @@ static int omap8250_runtime_suspend(struct device *dev) return -EBUSY; } - omap8250_enable_wakeup(priv, true); if (up->dma) omap_8250_rx_dma(up, UART_IIR_RX_TIMEOUT); @@ -1354,7 +1305,6 @@ static int omap8250_runtime_resume(struct device *dev) return 0; up = serial8250_get_port(priv->line); - omap8250_enable_wakeup(priv, false); loss_cntx = omap8250_lost_context(up); if (loss_cntx) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 7f49172ccd86..7a2172b5e93c 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -38,6 +38,7 @@ #include <linux/serial_core.h> #include <linux/irq.h> #include <linux/pm_runtime.h> +#include <linux/pm_wakeirq.h> #include <linux/of.h> #include <linux/of_irq.h> #include <linux/gpio.h> @@ -160,7 +161,6 @@ struct uart_omap_port { unsigned long port_activity; int context_loss_cnt; u32 errata; - u8 wakeups_enabled; u32 features; int rts_gpio; @@ -209,28 +209,11 @@ static int serial_omap_get_context_loss_count(struct uart_omap_port *up) return pdata->get_context_loss_count(up->dev); } -static inline void serial_omap_enable_wakeirq(struct uart_omap_port *up, - bool enable) -{ - if (!up->wakeirq) - return; - - if (enable) - enable_irq(up->wakeirq); - else - disable_irq_nosync(up->wakeirq); -} - +/* REVISIT: Remove this when omap3 boots in device tree only mode */ static void serial_omap_enable_wakeup(struct uart_omap_port *up, bool enable) { struct omap_uart_port_info *pdata = dev_get_platdata(up->dev); - if (enable == up->wakeups_enabled) - return; - - serial_omap_enable_wakeirq(up, enable); - up->wakeups_enabled = enable; - if (!pdata || !pdata->enable_wakeup) return; @@ -750,13 +733,11 @@ static int serial_omap_startup(struct uart_port *port) /* Optional wake-up IRQ */ if (up->wakeirq) { - retval = request_irq(up->wakeirq, serial_omap_irq, - up->port.irqflags, up->name, up); + retval = dev_pm_set_dedicated_wake_irq(up->dev, up->wakeirq); if (retval) { free_irq(up->port.irq, up); return retval; } - disable_irq(up->wakeirq); } dev_dbg(up->port.dev, "serial_omap_startup+%d\n", up->port.line); @@ -845,8 +826,7 @@ static void serial_omap_shutdown(struct uart_port *port) pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); free_irq(up->port.irq, up); - if (up->wakeirq) - free_irq(up->wakeirq, up); + dev_pm_clear_wake_irq(up->dev); } static void serial_omap_uart_qos_work(struct work_struct *work) @@ -1139,13 +1119,6 @@ serial_omap_pm(struct uart_port *port, unsigned int state, serial_out(up, UART_EFR, efr); serial_out(up, UART_LCR, 0); - if (!device_may_wakeup(up->dev)) { - if (!state) - pm_runtime_forbid(up->dev); - else - pm_runtime_allow(up->dev); - } - pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); } diff --git a/drivers/video/fbdev/omap2/dss/dss.c b/drivers/video/fbdev/omap2/dss/dss.c index 612b093831d5..9200a8668b49 100644 --- a/drivers/video/fbdev/omap2/dss/dss.c +++ b/drivers/video/fbdev/omap2/dss/dss.c @@ -1225,6 +1225,15 @@ static int dss_add_child_component(struct device *dev, void *data) { struct component_match **match = data; + /* + * HACK + * We don't have a working driver for rfbi, so skip it here always. + * Otherwise dss will never get probed successfully, as it will wait + * for rfbi to get probed. + */ + if (strstr(dev_name(dev), "rfbi")) + return 0; + component_match_add(dev->parent, match, dss_component_compare, dev); return 0; diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 64fa248343f6..8f84646f10e9 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -187,10 +187,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, val_size2 = posix_acl_xattr_size(default_acl->a_count); err = -ENOMEM; - tmp_buf = kmalloc(max(val_size1, val_size2), GFP_NOFS); + tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL); if (!tmp_buf) goto out_err; - pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_NOFS); + pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL); if (!pagelist) goto out_err; ceph_pagelist_init(pagelist); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e162bcd105ee..890c50971a69 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -87,17 +87,21 @@ static int ceph_set_page_dirty(struct page *page) inode = mapping->host; ci = ceph_inode(inode); - /* - * Note that we're grabbing a snapc ref here without holding - * any locks! - */ - snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); - /* dirty the head */ spin_lock(&ci->i_ceph_lock); - if (ci->i_head_snapc == NULL) - ci->i_head_snapc = ceph_get_snap_context(snapc); - ++ci->i_wrbuffer_ref_head; + BUG_ON(ci->i_wr_ref == 0); // caller should hold Fw reference + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + snapc = ceph_get_snap_context(capsnap->context); + capsnap->dirty_pages++; + } else { + BUG_ON(!ci->i_head_snapc); + snapc = ceph_get_snap_context(ci->i_head_snapc); + ++ci->i_wrbuffer_ref_head; + } if (ci->i_wrbuffer_ref == 0) ihold(inode); ++ci->i_wrbuffer_ref; @@ -346,7 +350,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) /* build page vector */ nr_pages = calc_pages_for(0, len); - pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS); + pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); ret = -ENOMEM; if (!pages) goto out; @@ -358,7 +362,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) dout("start_read %p adding %p idx %lu\n", inode, page, page->index); if (add_to_page_cache_lru(page, &inode->i_data, page->index, - GFP_NOFS)) { + GFP_KERNEL)) { ceph_fscache_uncache_page(inode, page); page_cache_release(page); dout("start_read %p add_to_page_cache failed %p\n", @@ -436,7 +440,7 @@ out: * only snap context we are allowed to write back. */ static struct ceph_snap_context *get_oldest_context(struct inode *inode, - u64 *snap_size) + loff_t *snap_size) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc = NULL; @@ -476,8 +480,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_osd_client *osdc; struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); + loff_t snap_size = -1; long writeback_stat; - u64 truncate_size, snap_size = 0; + u64 truncate_size; u32 truncate_seq; int err = 0, len = PAGE_CACHE_SIZE; @@ -512,7 +517,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) spin_lock(&ci->i_ceph_lock); truncate_seq = ci->i_truncate_seq; truncate_size = ci->i_truncate_size; - if (!snap_size) + if (snap_size == -1) snap_size = i_size_read(inode); spin_unlock(&ci->i_ceph_lock); @@ -695,7 +700,8 @@ static int ceph_writepages_start(struct address_space *mapping, unsigned wsize = 1 << inode->i_blkbits; struct ceph_osd_request *req = NULL; int do_sync = 0; - u64 truncate_size, snap_size; + loff_t snap_size, i_size; + u64 truncate_size; u32 truncate_seq; /* @@ -741,7 +747,7 @@ static int ceph_writepages_start(struct address_space *mapping, retry: /* find oldest snap context with dirty data */ ceph_put_snap_context(snapc); - snap_size = 0; + snap_size = -1; snapc = get_oldest_context(inode, &snap_size); if (!snapc) { /* hmm, why does writepages get called when there @@ -749,16 +755,13 @@ retry: dout(" no snap context with dirty data?\n"); goto out; } - if (snap_size == 0) - snap_size = i_size_read(inode); dout(" oldest snapc is %p seq %lld (%d snaps)\n", snapc, snapc->seq, snapc->num_snaps); spin_lock(&ci->i_ceph_lock); truncate_seq = ci->i_truncate_seq; truncate_size = ci->i_truncate_size; - if (!snap_size) - snap_size = i_size_read(inode); + i_size = i_size_read(inode); spin_unlock(&ci->i_ceph_lock); if (last_snapc && snapc != last_snapc) { @@ -828,8 +831,10 @@ get_more_pages: dout("waiting on writeback %p\n", page); wait_on_page_writeback(page); } - if (page_offset(page) >= snap_size) { - dout("%p page eof %llu\n", page, snap_size); + if (page_offset(page) >= + (snap_size == -1 ? i_size : snap_size)) { + dout("%p page eof %llu\n", page, + (snap_size == -1 ? i_size : snap_size)); done = 1; unlock_page(page); break; @@ -884,7 +889,8 @@ get_more_pages: } if (do_sync) - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + osd_req_op_init(req, 1, + CEPH_OSD_OP_STARTSYNC, 0); req->r_callback = writepages_finish; req->r_inode = inode; @@ -944,10 +950,18 @@ get_more_pages: } /* Format the osd request message and submit the write */ - offset = page_offset(pages[0]); - len = min(snap_size - offset, - (u64)locked_pages << PAGE_CACHE_SHIFT); + len = (u64)locked_pages << PAGE_CACHE_SHIFT; + if (snap_size == -1) { + len = min(len, (u64)i_size_read(inode) - offset); + /* writepages_finish() clears writeback pages + * according to the data length, so make sure + * data length covers all locked pages */ + len = max(len, 1 + + ((u64)(locked_pages - 1) << PAGE_CACHE_SHIFT)); + } else { + len = min(len, snap_size - offset); + } dout("writepages got %d pages at %llu~%llu\n", locked_pages, offset, len); @@ -1032,7 +1046,6 @@ static int ceph_update_writeable_page(struct file *file, { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; loff_t page_off = pos & PAGE_CACHE_MASK; int pos_in_page = pos & ~PAGE_CACHE_MASK; int end_in_page = pos_in_page + len; @@ -1044,10 +1057,6 @@ retry_locked: /* writepages currently holds page lock, but if we change that later, */ wait_on_page_writeback(page); - /* check snap context */ - BUG_ON(!ci->i_snap_realm); - down_read(&mdsc->snap_rwsem); - BUG_ON(!ci->i_snap_realm->cached_context); snapc = page_snap_context(page); if (snapc && snapc != ci->i_head_snapc) { /* @@ -1055,7 +1064,6 @@ retry_locked: * context! is it writeable now? */ oldest = get_oldest_context(inode, NULL); - up_read(&mdsc->snap_rwsem); if (snapc->seq > oldest->seq) { ceph_put_snap_context(oldest); @@ -1112,7 +1120,6 @@ retry_locked: } /* we need to read it. */ - up_read(&mdsc->snap_rwsem); r = readpage_nounlock(file, page); if (r < 0) goto fail_nosnap; @@ -1157,16 +1164,13 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, /* * we don't do anything in here that simple_write_end doesn't do - * except adjust dirty page accounting and drop read lock on - * mdsc->snap_rwsem. + * except adjust dirty page accounting */ static int ceph_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { struct inode *inode = file_inode(file); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_mds_client *mdsc = fsc->mdsc; unsigned from = pos & (PAGE_CACHE_SIZE - 1); int check_cap = 0; @@ -1188,7 +1192,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, set_page_dirty(page); unlock_page(page); - up_read(&mdsc->snap_rwsem); page_cache_release(page); if (check_cap) @@ -1314,13 +1317,17 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_cap_flush *prealloc_cf; struct page *page = vmf->page; loff_t off = page_offset(page); loff_t size = i_size_read(inode); size_t len; int want, got, ret; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return VM_FAULT_SIGBUS; + if (ci->i_inline_version != CEPH_INLINE_NONE) { struct page *locked_page = NULL; if (off == 0) { @@ -1330,8 +1337,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = ceph_uninline_data(vma->vm_file, locked_page); if (locked_page) unlock_page(locked_page); - if (ret < 0) - return VM_FAULT_SIGBUS; + if (ret < 0) { + ret = VM_FAULT_SIGBUS; + goto out_free; + } } if (off + PAGE_CACHE_SIZE <= size) @@ -1353,7 +1362,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) break; if (ret != -ERESTARTSYS) { WARN_ON(1); - return VM_FAULT_SIGBUS; + ret = VM_FAULT_SIGBUS; + goto out_free; } } dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", @@ -1373,7 +1383,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret == 0) { /* success. we'll keep the page locked. */ set_page_dirty(page); - up_read(&mdsc->snap_rwsem); ret = VM_FAULT_LOCKED; } else { if (ret == -ENOMEM) @@ -1389,7 +1398,8 @@ out: int dirty; spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, + &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); @@ -1398,6 +1408,8 @@ out: dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n", inode, off, len, ceph_cap_string(got), ret); ceph_put_cap_refs(ci, got); +out_free: + ceph_free_cap_flush(prealloc_cf); return ret; } @@ -1509,8 +1521,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ceph_vino(inode), 0, &len, 0, 1, CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, - ci->i_snap_realm->cached_context, - 0, 0, false); + ceph_empty_snapc, 0, 0, false); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; @@ -1528,7 +1539,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ceph_vino(inode), 0, &len, 1, 3, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, - ci->i_snap_realm->cached_context, + ceph_empty_snapc, ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { @@ -1597,3 +1608,206 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &ceph_vmops; return 0; } + +enum { + POOL_READ = 1, + POOL_WRITE = 2, +}; + +static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) +{ + struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); + struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_osd_request *rd_req = NULL, *wr_req = NULL; + struct rb_node **p, *parent; + struct ceph_pool_perm *perm; + struct page **pages; + int err = 0, err2 = 0, have = 0; + + down_read(&mdsc->pool_perm_rwsem); + p = &mdsc->pool_perm_tree.rb_node; + while (*p) { + perm = rb_entry(*p, struct ceph_pool_perm, node); + if (pool < perm->pool) + p = &(*p)->rb_left; + else if (pool > perm->pool) + p = &(*p)->rb_right; + else { + have = perm->perm; + break; + } + } + up_read(&mdsc->pool_perm_rwsem); + if (*p) + goto out; + + dout("__ceph_pool_perm_get pool %u no perm cached\n", pool); + + down_write(&mdsc->pool_perm_rwsem); + parent = NULL; + while (*p) { + parent = *p; + perm = rb_entry(parent, struct ceph_pool_perm, node); + if (pool < perm->pool) + p = &(*p)->rb_left; + else if (pool > perm->pool) + p = &(*p)->rb_right; + else { + have = perm->perm; + break; + } + } + if (*p) { + up_write(&mdsc->pool_perm_rwsem); + goto out; + } + + rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, + ceph_empty_snapc, + 1, false, GFP_NOFS); + if (!rd_req) { + err = -ENOMEM; + goto out_unlock; + } + + rd_req->r_flags = CEPH_OSD_FLAG_READ; + osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0); + rd_req->r_base_oloc.pool = pool; + snprintf(rd_req->r_base_oid.name, sizeof(rd_req->r_base_oid.name), + "%llx.00000000", ci->i_vino.ino); + rd_req->r_base_oid.name_len = strlen(rd_req->r_base_oid.name); + + wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, + ceph_empty_snapc, + 1, false, GFP_NOFS); + if (!wr_req) { + err = -ENOMEM; + goto out_unlock; + } + + wr_req->r_flags = CEPH_OSD_FLAG_WRITE | + CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK; + osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL); + wr_req->r_base_oloc.pool = pool; + wr_req->r_base_oid = rd_req->r_base_oid; + + /* one page should be large enough for STAT data */ + pages = ceph_alloc_page_vector(1, GFP_KERNEL); + if (IS_ERR(pages)) { + err = PTR_ERR(pages); + goto out_unlock; + } + + osd_req_op_raw_data_in_pages(rd_req, 0, pages, PAGE_SIZE, + 0, false, true); + ceph_osdc_build_request(rd_req, 0, NULL, CEPH_NOSNAP, + &ci->vfs_inode.i_mtime); + err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false); + + ceph_osdc_build_request(wr_req, 0, NULL, CEPH_NOSNAP, + &ci->vfs_inode.i_mtime); + err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false); + + if (!err) + err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req); + if (!err2) + err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req); + + if (err >= 0 || err == -ENOENT) + have |= POOL_READ; + else if (err != -EPERM) + goto out_unlock; + + if (err2 == 0 || err2 == -EEXIST) + have |= POOL_WRITE; + else if (err2 != -EPERM) { + err = err2; + goto out_unlock; + } + + perm = kmalloc(sizeof(*perm), GFP_NOFS); + if (!perm) { + err = -ENOMEM; + goto out_unlock; + } + + perm->pool = pool; + perm->perm = have; + rb_link_node(&perm->node, parent, p); + rb_insert_color(&perm->node, &mdsc->pool_perm_tree); + err = 0; +out_unlock: + up_write(&mdsc->pool_perm_rwsem); + + if (rd_req) + ceph_osdc_put_request(rd_req); + if (wr_req) + ceph_osdc_put_request(wr_req); +out: + if (!err) + err = have; + dout("__ceph_pool_perm_get pool %u result = %d\n", pool, err); + return err; +} + +int ceph_pool_perm_check(struct ceph_inode_info *ci, int need) +{ + u32 pool; + int ret, flags; + + if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), + NOPOOLPERM)) + return 0; + + spin_lock(&ci->i_ceph_lock); + flags = ci->i_ceph_flags; + pool = ceph_file_layout_pg_pool(ci->i_layout); + spin_unlock(&ci->i_ceph_lock); +check: + if (flags & CEPH_I_POOL_PERM) { + if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) { + dout("ceph_pool_perm_check pool %u no read perm\n", + pool); + return -EPERM; + } + if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) { + dout("ceph_pool_perm_check pool %u no write perm\n", + pool); + return -EPERM; + } + return 0; + } + + ret = __ceph_pool_perm_get(ci, pool); + if (ret < 0) + return ret; + + flags = CEPH_I_POOL_PERM; + if (ret & POOL_READ) + flags |= CEPH_I_POOL_RD; + if (ret & POOL_WRITE) + flags |= CEPH_I_POOL_WR; + + spin_lock(&ci->i_ceph_lock); + if (pool == ceph_file_layout_pg_pool(ci->i_layout)) { + ci->i_ceph_flags = flags; + } else { + pool = ceph_file_layout_pg_pool(ci->i_layout); + flags = ci->i_ceph_flags; + } + spin_unlock(&ci->i_ceph_lock); + goto check; +} + +void ceph_pool_perm_destroy(struct ceph_mds_client *mdsc) +{ + struct ceph_pool_perm *perm; + struct rb_node *n; + + while (!RB_EMPTY_ROOT(&mdsc->pool_perm_tree)) { + n = rb_first(&mdsc->pool_perm_tree); + perm = rb_entry(n, struct ceph_pool_perm, node); + rb_erase(n, &mdsc->pool_perm_tree); + kfree(perm); + } +} diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index be5ea6af8366..dc10c9dd36c1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -833,7 +833,9 @@ int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_PIN; if (ci->i_rd_ref) used |= CEPH_CAP_FILE_RD; - if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) + if (ci->i_rdcache_ref || + (!S_ISDIR(ci->vfs_inode.i_mode) && /* ignore readdir cache */ + ci->vfs_inode.i_data.nrpages)) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; @@ -926,16 +928,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) /* remove from session list */ spin_lock(&session->s_cap_lock); - /* - * s_cap_reconnect is protected by s_cap_lock. no one changes - * s_cap_gen while session is in the reconnect state. - */ - if (queue_release && - (!session->s_cap_reconnect || - cap->cap_gen == session->s_cap_gen)) - __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, - cap->mseq, cap->issue_seq); - if (session->s_cap_iterator == cap) { /* not yet, we are iterating over this very cap */ dout("__ceph_remove_cap delaying %p removal from session %p\n", @@ -948,6 +940,25 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) } /* protect backpointer with s_cap_lock: see iterate_session_caps */ cap->ci = NULL; + + /* + * s_cap_reconnect is protected by s_cap_lock. no one changes + * s_cap_gen while session is in the reconnect state. + */ + if (queue_release && + (!session->s_cap_reconnect || cap->cap_gen == session->s_cap_gen)) { + cap->queue_release = 1; + if (removed) { + list_add_tail(&cap->session_caps, + &session->s_cap_releases); + session->s_num_cap_releases++; + removed = 0; + } + } else { + cap->queue_release = 0; + } + cap->cap_ino = ci->i_vino.ino; + spin_unlock(&session->s_cap_lock); /* remove from inode list */ @@ -977,8 +988,8 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) static int send_cap_msg(struct ceph_mds_session *session, u64 ino, u64 cid, int op, int caps, int wanted, int dirty, - u32 seq, u64 flush_tid, u32 issue_seq, u32 mseq, - u64 size, u64 max_size, + u32 seq, u64 flush_tid, u64 oldest_flush_tid, + u32 issue_seq, u32 mseq, u64 size, u64 max_size, struct timespec *mtime, struct timespec *atime, u64 time_warp_seq, kuid_t uid, kgid_t gid, umode_t mode, @@ -992,20 +1003,23 @@ static int send_cap_msg(struct ceph_mds_session *session, size_t extra_len; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" - " seq %u/%u mseq %u follows %lld size %llu/%llu" + " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu" " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(op), cid, ino, ceph_cap_string(caps), ceph_cap_string(wanted), ceph_cap_string(dirty), - seq, issue_seq, mseq, follows, size, max_size, + seq, issue_seq, flush_tid, oldest_flush_tid, + mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - /* flock buffer size + inline version + inline data size */ - extra_len = 4 + 8 + 4; + /* flock buffer size + inline version + inline data size + + * osd_epoch_barrier + oldest_flush_tid */ + extra_len = 4 + 8 + 4 + 4 + 8; msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len, GFP_NOFS, false); if (!msg) return -ENOMEM; + msg->hdr.version = cpu_to_le16(6); msg->hdr.tid = cpu_to_le64(flush_tid); fc = msg->front.iov_base; @@ -1041,6 +1055,10 @@ static int send_cap_msg(struct ceph_mds_session *session, ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE); /* inline data size */ ceph_encode_32(&p, 0); + /* osd_epoch_barrier */ + ceph_encode_32(&p, 0); + /* oldest_flush_tid */ + ceph_encode_64(&p, oldest_flush_tid); fc->xattr_version = cpu_to_le64(xattr_version); if (xattrs_buf) { @@ -1053,44 +1071,6 @@ static int send_cap_msg(struct ceph_mds_session *session, return 0; } -void __queue_cap_release(struct ceph_mds_session *session, - u64 ino, u64 cap_id, u32 migrate_seq, - u32 issue_seq) -{ - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - struct ceph_mds_cap_item *item; - - BUG_ON(!session->s_num_cap_releases); - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - - dout(" adding %llx release to mds%d msg %p (%d left)\n", - ino, session->s_mds, msg, session->s_num_cap_releases); - - BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); - head = msg->front.iov_base; - le32_add_cpu(&head->num, 1); - item = msg->front.iov_base + msg->front.iov_len; - item->ino = cpu_to_le64(ino); - item->cap_id = cpu_to_le64(cap_id); - item->migrate_seq = cpu_to_le32(migrate_seq); - item->seq = cpu_to_le32(issue_seq); - - session->s_num_cap_releases--; - - msg->front.iov_len += sizeof(*item); - if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { - dout(" release msg %p full\n", msg); - list_move_tail(&msg->list_head, &session->s_cap_releases_done); - } else { - dout(" release msg %p at %d/%d (%d)\n", msg, - (int)le32_to_cpu(head->num), - (int)CEPH_CAPS_PER_RELEASE, - (int)msg->front.iov_len); - } -} - /* * Queue cap releases when an inode is dropped from our cache. Since * inode is about to be destroyed, there is no need for i_ceph_lock. @@ -1127,7 +1107,7 @@ void ceph_queue_caps_release(struct inode *inode) */ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, int op, int used, int want, int retain, int flushing, - unsigned *pflush_tid) + u64 flush_tid, u64 oldest_flush_tid) __releases(cap->ci->i_ceph_lock) { struct ceph_inode_info *ci = cap->ci; @@ -1145,8 +1125,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, u64 xattr_version = 0; struct ceph_buffer *xattr_blob = NULL; int delayed = 0; - u64 flush_tid = 0; - int i; int ret; bool inline_data; @@ -1190,26 +1168,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, cap->implemented &= cap->issued | used; cap->mds_wanted = want; - if (flushing) { - /* - * assign a tid for flush operations so we can avoid - * flush1 -> dirty1 -> flush2 -> flushack1 -> mark - * clean type races. track latest tid for every bit - * so we can handle flush AxFw, flush Fw, and have the - * first ack clean Ax. - */ - flush_tid = ++ci->i_cap_flush_last_tid; - if (pflush_tid) - *pflush_tid = flush_tid; - dout(" cap_flush_tid %d\n", (int)flush_tid); - for (i = 0; i < CEPH_CAP_BITS; i++) - if (flushing & (1 << i)) - ci->i_cap_flush_tid[i] = flush_tid; - - follows = ci->i_head_snapc->seq; - } else { - follows = 0; - } + follows = flushing ? ci->i_head_snapc->seq : 0; keep = cap->implemented; seq = cap->seq; @@ -1237,7 +1196,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, spin_unlock(&ci->i_ceph_lock); ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, - op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, + op, keep, want, flushing, seq, + flush_tid, oldest_flush_tid, issue_seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, uid, gid, mode, xattr_version, xattr_blob, follows, inline_data); @@ -1259,14 +1219,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, * asynchronously back to the MDS once sync writes complete and dirty * data is written out. * - * Unless @again is true, skip cap_snaps that were already sent to + * Unless @kick is true, skip cap_snaps that were already sent to * the MDS (i.e., during this session). * * Called under i_ceph_lock. Takes s_mutex as needed. */ void __ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession, - int again) + int kick) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { @@ -1297,11 +1257,8 @@ retry: if (capsnap->dirty_pages || capsnap->writing) break; - /* - * if cap writeback already occurred, we should have dropped - * the capsnap in ceph_put_wrbuffer_cap_refs. - */ - BUG_ON(capsnap->dirty == 0); + /* should be removed by ceph_try_drop_cap_snap() */ + BUG_ON(!capsnap->need_flush); /* pick mds, take s_mutex */ if (ci->i_auth_cap == NULL) { @@ -1310,7 +1267,7 @@ retry: } /* only flush each capsnap once */ - if (!again && !list_empty(&capsnap->flushing_item)) { + if (!kick && !list_empty(&capsnap->flushing_item)) { dout("already flushed %p, skipping\n", capsnap); continue; } @@ -1320,6 +1277,9 @@ retry: if (session && session->s_mds != mds) { dout("oops, wrong session %p mutex\n", session); + if (kick) + goto out; + mutex_unlock(&session->s_mutex); ceph_put_mds_session(session); session = NULL; @@ -1343,20 +1303,22 @@ retry: goto retry; } - capsnap->flush_tid = ++ci->i_cap_flush_last_tid; + spin_lock(&mdsc->cap_dirty_lock); + capsnap->flush_tid = ++mdsc->last_cap_flush_tid; + spin_unlock(&mdsc->cap_dirty_lock); + atomic_inc(&capsnap->nref); - if (!list_empty(&capsnap->flushing_item)) - list_del_init(&capsnap->flushing_item); - list_add_tail(&capsnap->flushing_item, - &session->s_cap_snaps_flushing); + if (list_empty(&capsnap->flushing_item)) + list_add_tail(&capsnap->flushing_item, + &session->s_cap_snaps_flushing); spin_unlock(&ci->i_ceph_lock); dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", inode, capsnap, capsnap->follows, capsnap->flush_tid); send_cap_msg(session, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, - capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, - capsnap->size, 0, + capsnap->dirty, 0, capsnap->flush_tid, 0, + 0, mseq, capsnap->size, 0, &capsnap->mtime, &capsnap->atime, capsnap->time_warp_seq, capsnap->uid, capsnap->gid, capsnap->mode, @@ -1396,7 +1358,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) * Caller is then responsible for calling __mark_inode_dirty with the * returned flags value. */ -int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) +int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, + struct ceph_cap_flush **pcf) { struct ceph_mds_client *mdsc = ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; @@ -1416,9 +1379,14 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ceph_cap_string(was | mask)); ci->i_dirty_caps |= mask; if (was == 0) { - if (!ci->i_head_snapc) + WARN_ON_ONCE(ci->i_prealloc_cap_flush); + swap(ci->i_prealloc_cap_flush, *pcf); + + if (!ci->i_head_snapc) { + WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem)); ci->i_head_snapc = ceph_get_snap_context( ci->i_snap_realm->cached_context); + } dout(" inode %p now dirty snapc %p auth cap %p\n", &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); BUG_ON(!list_empty(&ci->i_dirty_item)); @@ -1429,6 +1397,8 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ihold(inode); dirty |= I_DIRTY_SYNC; } + } else { + WARN_ON_ONCE(!ci->i_prealloc_cap_flush); } BUG_ON(list_empty(&ci->i_dirty_item)); if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && @@ -1438,6 +1408,74 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) return dirty; } +static void __add_cap_flushing_to_inode(struct ceph_inode_info *ci, + struct ceph_cap_flush *cf) +{ + struct rb_node **p = &ci->i_cap_flush_tree.rb_node; + struct rb_node *parent = NULL; + struct ceph_cap_flush *other = NULL; + + while (*p) { + parent = *p; + other = rb_entry(parent, struct ceph_cap_flush, i_node); + + if (cf->tid < other->tid) + p = &(*p)->rb_left; + else if (cf->tid > other->tid) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&cf->i_node, parent, p); + rb_insert_color(&cf->i_node, &ci->i_cap_flush_tree); +} + +static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc, + struct ceph_cap_flush *cf) +{ + struct rb_node **p = &mdsc->cap_flush_tree.rb_node; + struct rb_node *parent = NULL; + struct ceph_cap_flush *other = NULL; + + while (*p) { + parent = *p; + other = rb_entry(parent, struct ceph_cap_flush, g_node); + + if (cf->tid < other->tid) + p = &(*p)->rb_left; + else if (cf->tid > other->tid) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&cf->g_node, parent, p); + rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree); +} + +struct ceph_cap_flush *ceph_alloc_cap_flush(void) +{ + return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL); +} + +void ceph_free_cap_flush(struct ceph_cap_flush *cf) +{ + if (cf) + kmem_cache_free(ceph_cap_flush_cachep, cf); +} + +static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc) +{ + struct rb_node *n = rb_first(&mdsc->cap_flush_tree); + if (n) { + struct ceph_cap_flush *cf = + rb_entry(n, struct ceph_cap_flush, g_node); + return cf->tid; + } + return 0; +} + /* * Add dirty inode to the flushing list. Assigned a seq number so we * can wait for caps to flush without starving. @@ -1445,14 +1483,17 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) * Called under i_ceph_lock. */ static int __mark_caps_flushing(struct inode *inode, - struct ceph_mds_session *session) + struct ceph_mds_session *session, + u64 *flush_tid, u64 *oldest_flush_tid) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_cap_flush *cf = NULL; int flushing; BUG_ON(ci->i_dirty_caps == 0); BUG_ON(list_empty(&ci->i_dirty_item)); + BUG_ON(!ci->i_prealloc_cap_flush); flushing = ci->i_dirty_caps; dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n", @@ -1463,22 +1504,31 @@ static int __mark_caps_flushing(struct inode *inode, ci->i_dirty_caps = 0; dout(" inode %p now !dirty\n", inode); + swap(cf, ci->i_prealloc_cap_flush); + cf->caps = flushing; + cf->kick = false; + spin_lock(&mdsc->cap_dirty_lock); list_del_init(&ci->i_dirty_item); + cf->tid = ++mdsc->last_cap_flush_tid; + __add_cap_flushing_to_mdsc(mdsc, cf); + *oldest_flush_tid = __get_oldest_flush_tid(mdsc); + if (list_empty(&ci->i_flushing_item)) { - ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); mdsc->num_cap_flushing++; - dout(" inode %p now flushing seq %lld\n", inode, - ci->i_cap_flush_seq); + dout(" inode %p now flushing tid %llu\n", inode, cf->tid); } else { list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing); - dout(" inode %p now flushing (more) seq %lld\n", inode, - ci->i_cap_flush_seq); + dout(" inode %p now flushing (more) tid %llu\n", + inode, cf->tid); } spin_unlock(&mdsc->cap_dirty_lock); + __add_cap_flushing_to_inode(ci, cf); + + *flush_tid = cf->tid; return flushing; } @@ -1524,6 +1574,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; + u64 flush_tid, oldest_flush_tid; int file_wanted, used, cap_used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ int issued, implemented, want, retain, revoking, flushing = 0; @@ -1553,13 +1604,13 @@ retry: retry_locked: file_wanted = __ceph_caps_file_wanted(ci); used = __ceph_caps_used(ci); - want = file_wanted | used; issued = __ceph_caps_issued(ci, &implemented); revoking = implemented & ~issued; - retain = want | CEPH_CAP_PIN; + want = file_wanted; + retain = file_wanted | used | CEPH_CAP_PIN; if (!mdsc->stopping && inode->i_nlink > 0) { - if (want) { + if (file_wanted) { retain |= CEPH_CAP_ANY; /* be greedy */ } else if (S_ISDIR(inode->i_mode) && (issued & CEPH_CAP_FILE_SHARED) && @@ -1602,9 +1653,10 @@ retry_locked: * If we fail, it's because pages are locked.... try again later. */ if ((!is_delayed || mdsc->stopping) && - ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ - inode->i_data.nrpages && /* have cached pages */ - (file_wanted == 0 || /* no open files */ + !S_ISDIR(inode->i_mode) && /* ignore readdir cache */ + ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ + inode->i_data.nrpages && /* have cached pages */ + (file_wanted == 0 || /* no open files */ (revoking & (CEPH_CAP_FILE_CACHE| CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ !tried_invalidate) { @@ -1742,17 +1794,25 @@ ack: took_snap_rwsem = 1; } - if (cap == ci->i_auth_cap && ci->i_dirty_caps) - flushing = __mark_caps_flushing(inode, session); - else + if (cap == ci->i_auth_cap && ci->i_dirty_caps) { + flushing = __mark_caps_flushing(inode, session, + &flush_tid, + &oldest_flush_tid); + } else { flushing = 0; + flush_tid = 0; + spin_lock(&mdsc->cap_dirty_lock); + oldest_flush_tid = __get_oldest_flush_tid(mdsc); + spin_unlock(&mdsc->cap_dirty_lock); + } mds = cap->mds; /* remember mds, so we don't repeat */ sent++; /* __send_cap drops i_ceph_lock */ delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, - want, retain, flushing, NULL); + want, retain, flushing, + flush_tid, oldest_flush_tid); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -1781,12 +1841,13 @@ ack: /* * Try to flush dirty caps back to the auth mds. */ -static int try_flush_caps(struct inode *inode, unsigned *flush_tid) +static int try_flush_caps(struct inode *inode, u64 *ptid) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); - int flushing = 0; struct ceph_mds_session *session = NULL; + int flushing = 0; + u64 flush_tid = 0, oldest_flush_tid = 0; retry: spin_lock(&ci->i_ceph_lock); @@ -1811,42 +1872,54 @@ retry: if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) goto out; - flushing = __mark_caps_flushing(inode, session); + flushing = __mark_caps_flushing(inode, session, &flush_tid, + &oldest_flush_tid); /* __send_cap drops i_ceph_lock */ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, - cap->issued | cap->implemented, flushing, - flush_tid); - if (!delayed) - goto out_unlocked; + (cap->issued | cap->implemented), + flushing, flush_tid, oldest_flush_tid); - spin_lock(&ci->i_ceph_lock); - __cap_delay_requeue(mdsc, ci); + if (delayed) { + spin_lock(&ci->i_ceph_lock); + __cap_delay_requeue(mdsc, ci); + spin_unlock(&ci->i_ceph_lock); + } + } else { + struct rb_node *n = rb_last(&ci->i_cap_flush_tree); + if (n) { + struct ceph_cap_flush *cf = + rb_entry(n, struct ceph_cap_flush, i_node); + flush_tid = cf->tid; + } + flushing = ci->i_flushing_caps; + spin_unlock(&ci->i_ceph_lock); } out: - spin_unlock(&ci->i_ceph_lock); -out_unlocked: if (session) mutex_unlock(&session->s_mutex); + + *ptid = flush_tid; return flushing; } /* * Return true if we've flushed caps through the given flush_tid. */ -static int caps_are_flushed(struct inode *inode, unsigned tid) +static int caps_are_flushed(struct inode *inode, u64 flush_tid) { struct ceph_inode_info *ci = ceph_inode(inode); - int i, ret = 1; + struct ceph_cap_flush *cf; + struct rb_node *n; + int ret = 1; spin_lock(&ci->i_ceph_lock); - for (i = 0; i < CEPH_CAP_BITS; i++) - if ((ci->i_flushing_caps & (1 << i)) && - ci->i_cap_flush_tid[i] <= tid) { - /* still flushing this bit */ + n = rb_first(&ci->i_cap_flush_tree); + if (n) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + if (cf->tid <= flush_tid) ret = 0; - break; - } + } spin_unlock(&ci->i_ceph_lock); return ret; } @@ -1864,13 +1937,16 @@ static void sync_write_wait(struct inode *inode) struct ceph_osd_request *req; u64 last_tid; + if (!S_ISREG(inode->i_mode)) + return; + spin_lock(&ci->i_unsafe_lock); if (list_empty(head)) goto out; /* set upper bound as _last_ entry in chain */ - req = list_entry(head->prev, struct ceph_osd_request, - r_unsafe_item); + req = list_last_entry(head, struct ceph_osd_request, + r_unsafe_item); last_tid = req->r_tid; do { @@ -1888,18 +1964,64 @@ static void sync_write_wait(struct inode *inode) */ if (list_empty(head)) break; - req = list_entry(head->next, struct ceph_osd_request, - r_unsafe_item); + req = list_first_entry(head, struct ceph_osd_request, + r_unsafe_item); } while (req->r_tid < last_tid); out: spin_unlock(&ci->i_unsafe_lock); } +/* + * wait for any uncommitted directory operations to commit. + */ +static int unsafe_dirop_wait(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct list_head *head = &ci->i_unsafe_dirops; + struct ceph_mds_request *req; + u64 last_tid; + int ret = 0; + + if (!S_ISDIR(inode->i_mode)) + return 0; + + spin_lock(&ci->i_unsafe_lock); + if (list_empty(head)) + goto out; + + req = list_last_entry(head, struct ceph_mds_request, + r_unsafe_dir_item); + last_tid = req->r_tid; + + do { + ceph_mdsc_get_request(req); + spin_unlock(&ci->i_unsafe_lock); + + dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n", + inode, req->r_tid, last_tid); + ret = !wait_for_completion_timeout(&req->r_safe_completion, + ceph_timeout_jiffies(req->r_timeout)); + if (ret) + ret = -EIO; /* timed out */ + + ceph_mdsc_put_request(req); + + spin_lock(&ci->i_unsafe_lock); + if (ret || list_empty(head)) + break; + req = list_first_entry(head, struct ceph_mds_request, + r_unsafe_dir_item); + } while (req->r_tid < last_tid); +out: + spin_unlock(&ci->i_unsafe_lock); + return ret; +} + int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); - unsigned flush_tid; + u64 flush_tid; int ret; int dirty; @@ -1908,25 +2030,30 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret < 0) - return ret; + goto out; + + if (datasync) + goto out; + mutex_lock(&inode->i_mutex); dirty = try_flush_caps(inode, &flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); + ret = unsafe_dirop_wait(inode); + /* * only wait on non-file metadata writeback (the mds * can recover size and mtime, so we don't need to * wait for that) */ - if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { - dout("fsync waiting for flush_tid %u\n", flush_tid); + if (!ret && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { ret = wait_event_interruptible(ci->i_cap_wq, - caps_are_flushed(inode, flush_tid)); + caps_are_flushed(inode, flush_tid)); } - - dout("fsync %p%s done\n", inode, datasync ? " datasync" : ""); mutex_unlock(&inode->i_mutex); +out: + dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); return ret; } @@ -1939,7 +2066,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) { struct ceph_inode_info *ci = ceph_inode(inode); - unsigned flush_tid; + u64 flush_tid; int err = 0; int dirty; int wait = wbc->sync_mode == WB_SYNC_ALL; @@ -1994,6 +2121,104 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, } } +static int __kick_flushing_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_inode_info *ci, + bool kick_all) +{ + struct inode *inode = &ci->vfs_inode; + struct ceph_cap *cap; + struct ceph_cap_flush *cf; + struct rb_node *n; + int delayed = 0; + u64 first_tid = 0; + u64 oldest_flush_tid; + + spin_lock(&mdsc->cap_dirty_lock); + oldest_flush_tid = __get_oldest_flush_tid(mdsc); + spin_unlock(&mdsc->cap_dirty_lock); + + while (true) { + spin_lock(&ci->i_ceph_lock); + cap = ci->i_auth_cap; + if (!(cap && cap->session == session)) { + pr_err("%p auth cap %p not mds%d ???\n", inode, + cap, session->s_mds); + spin_unlock(&ci->i_ceph_lock); + break; + } + + for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + if (cf->tid < first_tid) + continue; + if (kick_all || cf->kick) + break; + } + if (!n) { + spin_unlock(&ci->i_ceph_lock); + break; + } + + cf = rb_entry(n, struct ceph_cap_flush, i_node); + cf->kick = false; + + first_tid = cf->tid + 1; + + dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode, + cap, cf->tid, ceph_cap_string(cf->caps)); + delayed |= __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, + __ceph_caps_used(ci), + __ceph_caps_wanted(ci), + cap->issued | cap->implemented, + cf->caps, cf->tid, oldest_flush_tid); + } + return delayed; +} + +void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) +{ + struct ceph_inode_info *ci; + struct ceph_cap *cap; + struct ceph_cap_flush *cf; + struct rb_node *n; + + dout("early_kick_flushing_caps mds%d\n", session->s_mds); + list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { + spin_lock(&ci->i_ceph_lock); + cap = ci->i_auth_cap; + if (!(cap && cap->session == session)) { + pr_err("%p auth cap %p not mds%d ???\n", + &ci->vfs_inode, cap, session->s_mds); + spin_unlock(&ci->i_ceph_lock); + continue; + } + + + /* + * if flushing caps were revoked, we re-send the cap flush + * in client reconnect stage. This guarantees MDS * processes + * the cap flush message before issuing the flushing caps to + * other client. + */ + if ((cap->issued & ci->i_flushing_caps) != + ci->i_flushing_caps) { + spin_unlock(&ci->i_ceph_lock); + if (!__kick_flushing_caps(mdsc, session, ci, true)) + continue; + spin_lock(&ci->i_ceph_lock); + } + + for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + cf->kick = true; + } + + spin_unlock(&ci->i_ceph_lock); + } +} + void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { @@ -2003,28 +2228,10 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, dout("kick_flushing_caps mds%d\n", session->s_mds); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { - struct inode *inode = &ci->vfs_inode; - struct ceph_cap *cap; - int delayed = 0; - - spin_lock(&ci->i_ceph_lock); - cap = ci->i_auth_cap; - if (cap && cap->session == session) { - dout("kick_flushing_caps %p cap %p %s\n", inode, - cap, ceph_cap_string(ci->i_flushing_caps)); - delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, - __ceph_caps_used(ci), - __ceph_caps_wanted(ci), - cap->issued | cap->implemented, - ci->i_flushing_caps, NULL); - if (delayed) { - spin_lock(&ci->i_ceph_lock); - __cap_delay_requeue(mdsc, ci); - spin_unlock(&ci->i_ceph_lock); - } - } else { - pr_err("%p auth cap %p not mds%d ???\n", inode, - cap, session->s_mds); + int delayed = __kick_flushing_caps(mdsc, session, ci, false); + if (delayed) { + spin_lock(&ci->i_ceph_lock); + __cap_delay_requeue(mdsc, ci); spin_unlock(&ci->i_ceph_lock); } } @@ -2036,26 +2243,25 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; - int delayed = 0; spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; - dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, - ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); + dout("kick_flushing_inode_caps %p flushing %s\n", inode, + ceph_cap_string(ci->i_flushing_caps)); __ceph_flush_snaps(ci, &session, 1); if (ci->i_flushing_caps) { + int delayed; + spin_lock(&mdsc->cap_dirty_lock); list_move_tail(&ci->i_flushing_item, &cap->session->s_cap_flushing); spin_unlock(&mdsc->cap_dirty_lock); - delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, - __ceph_caps_used(ci), - __ceph_caps_wanted(ci), - cap->issued | cap->implemented, - ci->i_flushing_caps, NULL); + spin_unlock(&ci->i_ceph_lock); + + delayed = __kick_flushing_caps(mdsc, session, ci, true); if (delayed) { spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); @@ -2073,7 +2279,8 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, * * Protected by i_ceph_lock. */ -static void __take_cap_refs(struct ceph_inode_info *ci, int got) +static void __take_cap_refs(struct ceph_inode_info *ci, int got, + bool snap_rwsem_locked) { if (got & CEPH_CAP_PIN) ci->i_pin_ref++; @@ -2081,8 +2288,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) ci->i_rd_ref++; if (got & CEPH_CAP_FILE_CACHE) ci->i_rdcache_ref++; - if (got & CEPH_CAP_FILE_WR) + if (got & CEPH_CAP_FILE_WR) { + if (ci->i_wr_ref == 0 && !ci->i_head_snapc) { + BUG_ON(!snap_rwsem_locked); + ci->i_head_snapc = ceph_get_snap_context( + ci->i_snap_realm->cached_context); + } ci->i_wr_ref++; + } if (got & CEPH_CAP_FILE_BUFFER) { if (ci->i_wb_ref == 0) ihold(&ci->vfs_inode); @@ -2100,16 +2313,19 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) * requested from the MDS. */ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, - loff_t endoff, int *got, int *check_max, int *err) + loff_t endoff, bool nonblock, int *got, int *err) { struct inode *inode = &ci->vfs_inode; + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; int ret = 0; int have, implemented; int file_wanted; + bool snap_rwsem_locked = false; dout("get_cap_refs %p need %s want %s\n", inode, ceph_cap_string(need), ceph_cap_string(want)); +again: spin_lock(&ci->i_ceph_lock); /* make sure file is actually open */ @@ -2125,6 +2341,10 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, /* finish pending truncate */ while (ci->i_truncate_pending) { spin_unlock(&ci->i_ceph_lock); + if (snap_rwsem_locked) { + up_read(&mdsc->snap_rwsem); + snap_rwsem_locked = false; + } __ceph_do_pending_vmtruncate(inode); spin_lock(&ci->i_ceph_lock); } @@ -2136,7 +2356,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, dout("get_cap_refs %p endoff %llu > maxsize %llu\n", inode, endoff, ci->i_max_size); if (endoff > ci->i_requested_max_size) { - *check_max = 1; + *err = -EAGAIN; ret = 1; } goto out_unlock; @@ -2164,8 +2384,29 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, inode, ceph_cap_string(have), ceph_cap_string(not), ceph_cap_string(revoking)); if ((revoking & not) == 0) { + if (!snap_rwsem_locked && + !ci->i_head_snapc && + (need & CEPH_CAP_FILE_WR)) { + if (!down_read_trylock(&mdsc->snap_rwsem)) { + /* + * we can not call down_read() when + * task isn't in TASK_RUNNING state + */ + if (nonblock) { + *err = -EAGAIN; + ret = 1; + goto out_unlock; + } + + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + snap_rwsem_locked = true; + goto again; + } + snap_rwsem_locked = true; + } *got = need | (have & want); - __take_cap_refs(ci, *got); + __take_cap_refs(ci, *got, true); ret = 1; } } else { @@ -2189,6 +2430,8 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, } out_unlock: spin_unlock(&ci->i_ceph_lock); + if (snap_rwsem_locked) + up_read(&mdsc->snap_rwsem); dout("get_cap_refs %p ret %d got %s\n", inode, ret, ceph_cap_string(*got)); @@ -2231,50 +2474,70 @@ static void check_max_size(struct inode *inode, loff_t endoff) int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, loff_t endoff, int *got, struct page **pinned_page) { - int _got, check_max, ret, err = 0; + int _got, ret, err = 0; -retry: - if (endoff > 0) - check_max_size(&ci->vfs_inode, endoff); - _got = 0; - check_max = 0; - ret = wait_event_interruptible(ci->i_cap_wq, - try_get_cap_refs(ci, need, want, endoff, - &_got, &check_max, &err)); - if (err) - ret = err; + ret = ceph_pool_perm_check(ci, need); if (ret < 0) return ret; - if (check_max) - goto retry; + while (true) { + if (endoff > 0) + check_max_size(&ci->vfs_inode, endoff); - if (ci->i_inline_version != CEPH_INLINE_NONE && - (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && - i_size_read(&ci->vfs_inode) > 0) { - struct page *page = find_get_page(ci->vfs_inode.i_mapping, 0); - if (page) { - if (PageUptodate(page)) { - *pinned_page = page; - goto out; - } - page_cache_release(page); - } - /* - * drop cap refs first because getattr while holding - * caps refs can cause deadlock. - */ - ceph_put_cap_refs(ci, _got); + err = 0; _got = 0; + ret = try_get_cap_refs(ci, need, want, endoff, + false, &_got, &err); + if (ret) { + if (err == -EAGAIN) + continue; + if (err < 0) + return err; + } else { + ret = wait_event_interruptible(ci->i_cap_wq, + try_get_cap_refs(ci, need, want, endoff, + true, &_got, &err)); + if (err == -EAGAIN) + continue; + if (err < 0) + ret = err; + if (ret < 0) + return ret; + } - /* getattr request will bring inline data into page cache */ - ret = __ceph_do_getattr(&ci->vfs_inode, NULL, - CEPH_STAT_CAP_INLINE_DATA, true); - if (ret < 0) - return ret; - goto retry; + if (ci->i_inline_version != CEPH_INLINE_NONE && + (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && + i_size_read(&ci->vfs_inode) > 0) { + struct page *page = + find_get_page(ci->vfs_inode.i_mapping, 0); + if (page) { + if (PageUptodate(page)) { + *pinned_page = page; + break; + } + page_cache_release(page); + } + /* + * drop cap refs first because getattr while + * holding * caps refs can cause deadlock. + */ + ceph_put_cap_refs(ci, _got); + _got = 0; + + /* + * getattr request will bring inline data into + * page cache + */ + ret = __ceph_do_getattr(&ci->vfs_inode, NULL, + CEPH_STAT_CAP_INLINE_DATA, + true); + if (ret < 0) + return ret; + continue; + } + break; } -out: + *got = _got; return 0; } @@ -2286,10 +2549,31 @@ out: void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) { spin_lock(&ci->i_ceph_lock); - __take_cap_refs(ci, caps); + __take_cap_refs(ci, caps, false); spin_unlock(&ci->i_ceph_lock); } + +/* + * drop cap_snap that is not associated with any snapshot. + * we don't need to send FLUSHSNAP message for it. + */ +static int ceph_try_drop_cap_snap(struct ceph_cap_snap *capsnap) +{ + if (!capsnap->need_flush && + !capsnap->writing && !capsnap->dirty_pages) { + + dout("dropping cap_snap %p follows %llu\n", + capsnap, capsnap->follows); + ceph_put_snap_context(capsnap->context); + list_del(&capsnap->ci_item); + list_del(&capsnap->flushing_item); + ceph_put_cap_snap(capsnap); + return 1; + } + return 0; +} + /* * Release cap refs. * @@ -2303,7 +2587,6 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) { struct inode *inode = &ci->vfs_inode; int last = 0, put = 0, flushsnaps = 0, wake = 0; - struct ceph_cap_snap *capsnap; spin_lock(&ci->i_ceph_lock); if (had & CEPH_CAP_PIN) @@ -2325,17 +2608,24 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) if (had & CEPH_CAP_FILE_WR) if (--ci->i_wr_ref == 0) { last++; - if (!list_empty(&ci->i_cap_snaps)) { - capsnap = list_first_entry(&ci->i_cap_snaps, - struct ceph_cap_snap, - ci_item); - if (capsnap->writing) { - capsnap->writing = 0; - flushsnaps = - __ceph_finish_cap_snap(ci, - capsnap); - wake = 1; - } + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + capsnap->writing = 0; + if (ceph_try_drop_cap_snap(capsnap)) + put++; + else if (__ceph_finish_cap_snap(ci, capsnap)) + flushsnaps = 1; + wake = 1; + } + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + BUG_ON(!ci->i_head_snapc); + ceph_put_snap_context(ci->i_head_snapc); + ci->i_head_snapc = NULL; } /* see comment in __ceph_remove_cap() */ if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) @@ -2352,7 +2642,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) ceph_flush_snaps(ci); if (wake) wake_up_all(&ci->i_cap_wq); - if (put) + while (put-- > 0) iput(inode); } @@ -2380,7 +2670,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, if (ci->i_head_snapc == snapc) { ci->i_wrbuffer_ref_head -= nr; if (ci->i_wrbuffer_ref_head == 0 && - ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; @@ -2401,25 +2693,15 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, capsnap->dirty_pages -= nr; if (capsnap->dirty_pages == 0) { complete_capsnap = 1; - if (capsnap->dirty == 0) - /* cap writeback completed before we created - * the cap_snap; no FLUSHSNAP is needed */ - drop_capsnap = 1; + drop_capsnap = ceph_try_drop_cap_snap(capsnap); } dout("put_wrbuffer_cap_refs on %p cap_snap %p " - " snap %lld %d/%d -> %d/%d %s%s%s\n", + " snap %lld %d/%d -> %d/%d %s%s\n", inode, capsnap, capsnap->context->seq, ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, ci->i_wrbuffer_ref, capsnap->dirty_pages, last ? " (wrbuffer last)" : "", - complete_capsnap ? " (complete capsnap)" : "", - drop_capsnap ? " (drop capsnap)" : ""); - if (drop_capsnap) { - ceph_put_snap_context(capsnap->context); - list_del(&capsnap->ci_item); - list_del(&capsnap->flushing_item); - ceph_put_cap_snap(capsnap); - } + complete_capsnap ? " (complete capsnap)" : ""); } spin_unlock(&ci->i_ceph_lock); @@ -2526,7 +2808,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, * try to invalidate (once). (If there are dirty buffers, we * will invalidate _after_ writeback.) */ - if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && + if (!S_ISDIR(inode->i_mode) && /* don't invalidate readdir cache */ + ((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && !ci->i_wrbuffer_ref) { if (try_nonblocking_invalidate(inode)) { @@ -2732,16 +3015,29 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_cap_flush *cf; + struct rb_node *n; + LIST_HEAD(to_remove); unsigned seq = le32_to_cpu(m->seq); int dirty = le32_to_cpu(m->dirty); int cleaned = 0; int drop = 0; - int i; - for (i = 0; i < CEPH_CAP_BITS; i++) - if ((dirty & (1 << i)) && - (u16)flush_tid == ci->i_cap_flush_tid[i]) - cleaned |= 1 << i; + n = rb_first(&ci->i_cap_flush_tree); + while (n) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + n = rb_next(&cf->i_node); + if (cf->tid == flush_tid) + cleaned = cf->caps; + if (cf->tid <= flush_tid) { + rb_erase(&cf->i_node, &ci->i_cap_flush_tree); + list_add_tail(&cf->list, &to_remove); + } else { + cleaned &= ~cf->caps; + if (!cleaned) + break; + } + } dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," " flushing %s -> %s\n", @@ -2749,12 +3045,23 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps), ceph_cap_string(ci->i_flushing_caps & ~cleaned)); - if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned)) + if (list_empty(&to_remove) && !cleaned) goto out; ci->i_flushing_caps &= ~cleaned; spin_lock(&mdsc->cap_dirty_lock); + + if (!list_empty(&to_remove)) { + list_for_each_entry(cf, &to_remove, list) + rb_erase(&cf->g_node, &mdsc->cap_flush_tree); + + n = rb_first(&mdsc->cap_flush_tree); + cf = n ? rb_entry(n, struct ceph_cap_flush, g_node) : NULL; + if (!cf || cf->tid > flush_tid) + wake_up_all(&mdsc->cap_flushing_wq); + } + if (ci->i_flushing_caps == 0) { list_del_init(&ci->i_flushing_item); if (!list_empty(&session->s_cap_flushing)) @@ -2764,14 +3071,14 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, struct ceph_inode_info, i_flushing_item)->vfs_inode); mdsc->num_cap_flushing--; - wake_up_all(&mdsc->cap_flushing_wq); dout(" inode %p now !flushing\n", inode); if (ci->i_dirty_caps == 0) { dout(" inode %p now clean\n", inode); BUG_ON(!list_empty(&ci->i_dirty_item)); drop = 1; - if (ci->i_wrbuffer_ref_head == 0) { + if (ci->i_wr_ref == 0 && + ci->i_wrbuffer_ref_head == 0) { BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; @@ -2785,6 +3092,13 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, out: spin_unlock(&ci->i_ceph_lock); + + while (!list_empty(&to_remove)) { + cf = list_first_entry(&to_remove, + struct ceph_cap_flush, list); + list_del(&cf->list); + ceph_free_cap_flush(cf); + } if (drop) iput(inode); } @@ -2800,6 +3114,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, struct ceph_mds_session *session) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; u64 follows = le64_to_cpu(m->snap_follows); struct ceph_cap_snap *capsnap; int drop = 0; @@ -2823,6 +3138,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, list_del(&capsnap->ci_item); list_del(&capsnap->flushing_item); ceph_put_cap_snap(capsnap); + wake_up_all(&mdsc->cap_flushing_wq); drop = 1; break; } else { @@ -2971,7 +3287,6 @@ retry: mutex_lock_nested(&session->s_mutex, SINGLE_DEPTH_NESTING); } - ceph_add_cap_releases(mdsc, tsession); new_cap = ceph_get_cap(mdsc, NULL); } else { WARN_ON(1); @@ -3167,16 +3482,20 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, (unsigned)seq); - if (op == CEPH_CAP_OP_IMPORT) - ceph_add_cap_releases(mdsc, session); - if (!inode) { dout(" i don't have ino %llx\n", vino.ino); if (op == CEPH_CAP_OP_IMPORT) { + cap = ceph_get_cap(mdsc, NULL); + cap->cap_ino = vino.ino; + cap->queue_release = 1; + cap->cap_id = cap_id; + cap->mseq = mseq; + cap->seq = seq; spin_lock(&session->s_cap_lock); - __queue_cap_release(session, vino.ino, cap_id, - mseq, seq); + list_add_tail(&cap->session_caps, + &session->s_cap_releases); + session->s_num_cap_releases++; spin_unlock(&session->s_cap_lock); } goto flush_cap_releases; @@ -3252,11 +3571,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, flush_cap_releases: /* - * send any full release message to try to move things + * send any cap release message to try to move things * along for the mds (who clearly thinks we still have this * cap). */ - ceph_add_cap_releases(mdsc, session); ceph_send_cap_releases(mdsc, session); done: diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4248307fea90..9314b4ea2375 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -38,7 +38,7 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_fsdata) return 0; - di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); + di = kmem_cache_alloc(ceph_dentry_cachep, GFP_KERNEL | __GFP_ZERO); if (!di) return -ENOMEM; /* oh well */ @@ -107,6 +107,27 @@ static int fpos_cmp(loff_t l, loff_t r) } /* + * make note of the last dentry we read, so we can + * continue at the same lexicographical point, + * regardless of what dir changes take place on the + * server. + */ +static int note_last_dentry(struct ceph_file_info *fi, const char *name, + int len, unsigned next_offset) +{ + char *buf = kmalloc(len+1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + kfree(fi->last_name); + fi->last_name = buf; + memcpy(fi->last_name, name, len); + fi->last_name[len] = 0; + fi->next_offset = next_offset; + dout("note_last_dentry '%s'\n", fi->last_name); + return 0; +} + +/* * When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on * d_child when we initially get results back from the MDS, and @@ -123,123 +144,113 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, struct ceph_file_info *fi = file->private_data; struct dentry *parent = file->f_path.dentry; struct inode *dir = d_inode(parent); - struct list_head *p; - struct dentry *dentry, *last; + struct dentry *dentry, *last = NULL; struct ceph_dentry_info *di; + unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry *); int err = 0; + loff_t ptr_pos = 0; + struct ceph_readdir_cache_control cache_ctl = {}; - /* claim ref on last dentry we returned */ - last = fi->dentry; - fi->dentry = NULL; - - dout("__dcache_readdir %p v%u at %llu (last %p)\n", - dir, shared_gen, ctx->pos, last); + dout("__dcache_readdir %p v%u at %llu\n", dir, shared_gen, ctx->pos); - spin_lock(&parent->d_lock); - - /* start at beginning? */ - if (ctx->pos == 2 || last == NULL || - fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) { - if (list_empty(&parent->d_subdirs)) - goto out_unlock; - p = parent->d_subdirs.prev; - dout(" initial p %p/%p\n", p->prev, p->next); - } else { - p = last->d_child.prev; + /* we can calculate cache index for the first dirfrag */ + if (ceph_frag_is_leftmost(fpos_frag(ctx->pos))) { + cache_ctl.index = fpos_off(ctx->pos) - 2; + BUG_ON(cache_ctl.index < 0); + ptr_pos = cache_ctl.index * sizeof(struct dentry *); } -more: - dentry = list_entry(p, struct dentry, d_child); - di = ceph_dentry(dentry); - while (1) { - dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, - d_unhashed(dentry) ? "!hashed" : "hashed", - parent->d_subdirs.prev, parent->d_subdirs.next); - if (p == &parent->d_subdirs) { + while (true) { + pgoff_t pgoff; + bool emit_dentry; + + if (ptr_pos >= i_size_read(dir)) { fi->flags |= CEPH_F_ATEND; - goto out_unlock; + err = 0; + break; + } + + err = -EAGAIN; + pgoff = ptr_pos >> PAGE_CACHE_SHIFT; + if (!cache_ctl.page || pgoff != page_index(cache_ctl.page)) { + ceph_readdir_cache_release(&cache_ctl); + cache_ctl.page = find_lock_page(&dir->i_data, pgoff); + if (!cache_ctl.page) { + dout(" page %lu not found\n", pgoff); + break; + } + /* reading/filling the cache are serialized by + * i_mutex, no need to use page lock */ + unlock_page(cache_ctl.page); + cache_ctl.dentries = kmap(cache_ctl.page); } - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + + rcu_read_lock(); + spin_lock(&parent->d_lock); + /* check i_size again here, because empty directory can be + * marked as complete while not holding the i_mutex. */ + if (ceph_dir_is_complete_ordered(dir) && + ptr_pos < i_size_read(dir)) + dentry = cache_ctl.dentries[cache_ctl.index % nsize]; + else + dentry = NULL; + spin_unlock(&parent->d_lock); + if (dentry && !lockref_get_not_dead(&dentry->d_lockref)) + dentry = NULL; + rcu_read_unlock(); + if (!dentry) + break; + + emit_dentry = false; + di = ceph_dentry(dentry); + spin_lock(&dentry->d_lock); if (di->lease_shared_gen == shared_gen && - !d_unhashed(dentry) && d_really_is_positive(dentry) && + d_really_is_positive(dentry) && ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR && ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH && - fpos_cmp(ctx->pos, di->offset) <= 0) - break; - dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry, - dentry, di->offset, - ctx->pos, d_unhashed(dentry) ? " unhashed" : "", - !d_inode(dentry) ? " null" : ""); + fpos_cmp(ctx->pos, di->offset) <= 0) { + emit_dentry = true; + } spin_unlock(&dentry->d_lock); - p = p->prev; - dentry = list_entry(p, struct dentry, d_child); - di = ceph_dentry(dentry); - } - - dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&parent->d_lock); - /* make sure a dentry wasn't dropped while we didn't have parent lock */ - if (!ceph_dir_is_complete_ordered(dir)) { - dout(" lost dir complete on %p; falling back to mds\n", dir); - dput(dentry); - err = -EAGAIN; - goto out; - } + if (emit_dentry) { + dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos, + dentry, dentry, d_inode(dentry)); + ctx->pos = di->offset; + if (!dir_emit(ctx, dentry->d_name.name, + dentry->d_name.len, + ceph_translate_ino(dentry->d_sb, + d_inode(dentry)->i_ino), + d_inode(dentry)->i_mode >> 12)) { + dput(dentry); + err = 0; + break; + } + ctx->pos++; - dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos, - dentry, dentry, d_inode(dentry)); - if (!dir_emit(ctx, dentry->d_name.name, - dentry->d_name.len, - ceph_translate_ino(dentry->d_sb, d_inode(dentry)->i_ino), - d_inode(dentry)->i_mode >> 12)) { - if (last) { - /* remember our position */ - fi->dentry = last; - fi->next_offset = fpos_off(di->offset); + if (last) + dput(last); + last = dentry; + } else { + dput(dentry); } - dput(dentry); - return 0; - } - - ctx->pos = di->offset + 1; - if (last) - dput(last); - last = dentry; - - spin_lock(&parent->d_lock); - p = p->prev; /* advance to next dentry */ - goto more; - -out_unlock: - spin_unlock(&parent->d_lock); -out: - if (last) + cache_ctl.index++; + ptr_pos += sizeof(struct dentry *); + } + ceph_readdir_cache_release(&cache_ctl); + if (last) { + int ret; + di = ceph_dentry(last); + ret = note_last_dentry(fi, last->d_name.name, last->d_name.len, + fpos_off(di->offset) + 1); + if (ret < 0) + err = ret; dput(last); + } return err; } -/* - * make note of the last dentry we read, so we can - * continue at the same lexicographical point, - * regardless of what dir changes take place on the - * server. - */ -static int note_last_dentry(struct ceph_file_info *fi, const char *name, - int len) -{ - kfree(fi->last_name); - fi->last_name = kmalloc(len+1, GFP_NOFS); - if (!fi->last_name) - return -ENOMEM; - memcpy(fi->last_name, name, len); - fi->last_name[len] = 0; - dout("note_last_dentry '%s'\n", fi->last_name); - return 0; -} - static int ceph_readdir(struct file *file, struct dir_context *ctx) { struct ceph_file_info *fi = file->private_data; @@ -280,8 +291,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) /* can we use the dcache? */ spin_lock(&ci->i_ceph_lock); - if ((ctx->pos == 2 || fi->dentry) && - ceph_test_mount_opt(fsc, DCACHE) && + if (ceph_test_mount_opt(fsc, DCACHE) && !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && ceph_snap(inode) != CEPH_SNAPDIR && __ceph_dir_is_complete_ordered(ci) && @@ -296,24 +306,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) } else { spin_unlock(&ci->i_ceph_lock); } - if (fi->dentry) { - err = note_last_dentry(fi, fi->dentry->d_name.name, - fi->dentry->d_name.len); - if (err) - return err; - dput(fi->dentry); - fi->dentry = NULL; - } /* proceed with a normal readdir */ - - if (ctx->pos == 2) { - /* note dir version at start of readdir so we can tell - * if any dentries get dropped */ - fi->dir_release_count = atomic_read(&ci->i_release_count); - fi->dir_ordered_count = ci->i_ordered_count; - } - more: /* do we have the correct frag content buffered? */ if (fi->frag != frag || fi->last_readdir == NULL) { @@ -342,12 +336,15 @@ more: req->r_direct_hash = ceph_frag_value(frag); req->r_direct_is_hash = true; if (fi->last_name) { - req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); + req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); if (!req->r_path2) { ceph_mdsc_put_request(req); return -ENOMEM; } } + req->r_dir_release_cnt = fi->dir_release_count; + req->r_dir_ordered_cnt = fi->dir_ordered_count; + req->r_readdir_cache_idx = fi->readdir_cache_idx; req->r_readdir_offset = fi->next_offset; req->r_args.readdir.frag = cpu_to_le32(frag); @@ -364,26 +361,38 @@ more: (int)req->r_reply_info.dir_end, (int)req->r_reply_info.dir_complete); - if (!req->r_did_prepopulate) { - dout("readdir !did_prepopulate"); - /* preclude from marking dir complete */ - fi->dir_release_count--; - } /* note next offset and last dentry name */ rinfo = &req->r_reply_info; if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { frag = le32_to_cpu(rinfo->dir_dir->frag); - if (ceph_frag_is_leftmost(frag)) - fi->next_offset = 2; - else - fi->next_offset = 0; - off = fi->next_offset; + off = req->r_readdir_offset; + fi->next_offset = off; } + fi->frag = frag; fi->offset = fi->next_offset; fi->last_readdir = req; + if (req->r_did_prepopulate) { + fi->readdir_cache_idx = req->r_readdir_cache_idx; + if (fi->readdir_cache_idx < 0) { + /* preclude from marking dir ordered */ + fi->dir_ordered_count = 0; + } else if (ceph_frag_is_leftmost(frag) && off == 2) { + /* note dir version at start of readdir so + * we can tell if any dentries get dropped */ + fi->dir_release_count = req->r_dir_release_cnt; + fi->dir_ordered_count = req->r_dir_ordered_cnt; + } + } else { + dout("readdir !did_prepopulate"); + /* disable readdir cache */ + fi->readdir_cache_idx = -1; + /* preclude from marking dir complete */ + fi->dir_release_count = 0; + } + if (req->r_reply_info.dir_end) { kfree(fi->last_name); fi->last_name = NULL; @@ -394,10 +403,10 @@ more: } else { err = note_last_dentry(fi, rinfo->dir_dname[rinfo->dir_nr-1], - rinfo->dir_dname_len[rinfo->dir_nr-1]); + rinfo->dir_dname_len[rinfo->dir_nr-1], + fi->next_offset + rinfo->dir_nr); if (err) return err; - fi->next_offset += rinfo->dir_nr; } } @@ -453,16 +462,22 @@ more: * were released during the whole readdir, and we should have * the complete dir contents in our cache. */ - spin_lock(&ci->i_ceph_lock); - if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { - if (ci->i_ordered_count == fi->dir_ordered_count) + if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) { + spin_lock(&ci->i_ceph_lock); + if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { dout(" marking %p complete and ordered\n", inode); - else + /* use i_size to track number of entries in + * readdir cache */ + BUG_ON(fi->readdir_cache_idx < 0); + i_size_write(inode, fi->readdir_cache_idx * + sizeof(struct dentry*)); + } else { dout(" marking %p complete\n", inode); + } __ceph_dir_set_complete(ci, fi->dir_release_count, fi->dir_ordered_count); + spin_unlock(&ci->i_ceph_lock); } - spin_unlock(&ci->i_ceph_lock); dout("readdir %p file %p done.\n", inode, file); return 0; @@ -476,14 +491,12 @@ static void reset_readdir(struct ceph_file_info *fi, unsigned frag) } kfree(fi->last_name); fi->last_name = NULL; + fi->dir_release_count = 0; + fi->readdir_cache_idx = -1; if (ceph_frag_is_leftmost(frag)) fi->next_offset = 2; /* compensate for . and .. */ else fi->next_offset = 0; - if (fi->dentry) { - dput(fi->dentry); - fi->dentry = NULL; - } fi->flags &= ~CEPH_F_ATEND; } @@ -497,13 +510,12 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) mutex_lock(&inode->i_mutex); retval = -EINVAL; switch (whence) { - case SEEK_END: - offset += inode->i_size + 2; /* FIXME */ - break; case SEEK_CUR: offset += file->f_pos; case SEEK_SET: break; + case SEEK_END: + retval = -EOPNOTSUPP; default: goto out; } @@ -516,20 +528,18 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) } retval = offset; - /* - * discard buffered readdir content on seekdir(0), or - * seek to new frag, or seek prior to current chunk. - */ if (offset == 0 || fpos_frag(offset) != fi->frag || fpos_off(offset) < fi->offset) { + /* discard buffered readdir content on seekdir(0), or + * seek to new frag, or seek prior to current chunk */ dout("dir_llseek dropping %p content\n", file); reset_readdir(fi, fpos_frag(offset)); + } else if (fpos_cmp(offset, old_offset) > 0) { + /* reset dir_release_count if we did a forward seek */ + fi->dir_release_count = 0; + fi->readdir_cache_idx = -1; } - - /* bump dir_release_count if we did a forward seek */ - if (fpos_cmp(offset, old_offset) > 0) - fi->dir_release_count--; } out: mutex_unlock(&inode->i_mutex); @@ -764,7 +774,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, err = PTR_ERR(req); goto out; } - req->r_path2 = kstrdup(dest, GFP_NOFS); + req->r_path2 = kstrdup(dest, GFP_KERNEL); if (!req->r_path2) { err = -ENOMEM; ceph_mdsc_put_request(req); @@ -985,16 +995,15 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, * to do it here. */ + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_complete(old_dir); + ceph_dir_clear_complete(new_dir); + d_move(old_dentry, new_dentry); /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(new_dentry); - - /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_complete(old_dir); - ceph_dir_clear_complete(new_dir); - } ceph_mdsc_put_request(req); return err; @@ -1189,7 +1198,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, return -EISDIR; if (!cf->dir_info) { - cf->dir_info = kmalloc(bufsize, GFP_NOFS); + cf->dir_info = kmalloc(bufsize, GFP_KERNEL); if (!cf->dir_info) return -ENOMEM; cf->dir_info_len = @@ -1224,66 +1233,6 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, } /* - * an fsync() on a dir will wait for any uncommitted directory - * operations to commit. - */ -static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, - int datasync) -{ - struct inode *inode = file_inode(file); - struct ceph_inode_info *ci = ceph_inode(inode); - struct list_head *head = &ci->i_unsafe_dirops; - struct ceph_mds_request *req; - u64 last_tid; - int ret = 0; - - dout("dir_fsync %p\n", inode); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) - return ret; - mutex_lock(&inode->i_mutex); - - spin_lock(&ci->i_unsafe_lock); - if (list_empty(head)) - goto out; - - req = list_entry(head->prev, - struct ceph_mds_request, r_unsafe_dir_item); - last_tid = req->r_tid; - - do { - ceph_mdsc_get_request(req); - spin_unlock(&ci->i_unsafe_lock); - - dout("dir_fsync %p wait on tid %llu (until %llu)\n", - inode, req->r_tid, last_tid); - if (req->r_timeout) { - unsigned long time_left = wait_for_completion_timeout( - &req->r_safe_completion, - req->r_timeout); - if (time_left > 0) - ret = 0; - else - ret = -EIO; /* timed out */ - } else { - wait_for_completion(&req->r_safe_completion); - } - ceph_mdsc_put_request(req); - - spin_lock(&ci->i_unsafe_lock); - if (ret || list_empty(head)) - break; - req = list_entry(head->next, - struct ceph_mds_request, r_unsafe_dir_item); - } while (req->r_tid < last_tid); -out: - spin_unlock(&ci->i_unsafe_lock); - mutex_unlock(&inode->i_mutex); - - return ret; -} - -/* * We maintain a private dentry LRU. * * FIXME: this needs to be changed to a per-mds lru to be useful. @@ -1353,7 +1302,7 @@ const struct file_operations ceph_dir_fops = { .open = ceph_open, .release = ceph_release, .unlocked_ioctl = ceph_ioctl, - .fsync = ceph_dir_fsync, + .fsync = ceph_fsync, }; const struct file_operations ceph_snapdir_fops = { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3b6b522b4b31..faf92095e105 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -89,13 +89,14 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) case S_IFDIR: dout("init_file %p %p 0%o (regular)\n", inode, file, inode->i_mode); - cf = kmem_cache_alloc(ceph_file_cachep, GFP_NOFS | __GFP_ZERO); + cf = kmem_cache_alloc(ceph_file_cachep, GFP_KERNEL | __GFP_ZERO); if (cf == NULL) { ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ return -ENOMEM; } cf->fmode = fmode; cf->next_offset = 2; + cf->readdir_cache_idx = -1; file->private_data = cf; BUG_ON(inode->i_fop->release != ceph_release); break; @@ -324,7 +325,6 @@ int ceph_release(struct inode *inode, struct file *file) ceph_mdsc_put_request(cf->last_readdir); kfree(cf->last_name); kfree(cf->dir_info); - dput(cf->dentry); kmem_cache_free(ceph_file_cachep, cf); /* wake up anyone waiting for caps on this inode */ @@ -483,7 +483,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, } } else { num_pages = calc_pages_for(off, len); - pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) return PTR_ERR(pages); ret = striped_read(inode, off, len, pages, @@ -557,13 +557,13 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) * objects, rollback on failure, etc.) */ static ssize_t -ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) +ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, + struct ceph_snap_context *snapc) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_snap_context *snapc; struct ceph_vino vino; struct ceph_osd_request *req; struct page **pages; @@ -600,7 +600,6 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) size_t start; ssize_t n; - snapc = ci->i_snap_realm->cached_context; vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, pos, &len, 0, @@ -614,7 +613,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) break; } - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); n = iov_iter_get_pages_alloc(from, &pages, len, &start); if (unlikely(n < 0)) { @@ -674,13 +673,13 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) * objects, rollback on failure, etc.) */ static ssize_t -ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) +ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, + struct ceph_snap_context *snapc) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_snap_context *snapc; struct ceph_vino vino; struct ceph_osd_request *req; struct page **pages; @@ -717,7 +716,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) size_t left; int n; - snapc = ci->i_snap_realm->cached_context; vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, pos, &len, 0, 1, @@ -736,7 +734,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) */ num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; @@ -860,7 +858,7 @@ again: struct page *page = NULL; loff_t i_size; if (retry_op == READ_INLINE) { - page = __page_cache_alloc(GFP_NOFS); + page = __page_cache_alloc(GFP_KERNEL); if (!page) return -ENOMEM; } @@ -941,6 +939,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; + struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; int err, want, got; loff_t pos; @@ -948,6 +947,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ @@ -996,14 +999,30 @@ retry_snap: if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { + struct ceph_snap_context *snapc; struct iov_iter data; mutex_unlock(&inode->i_mutex); + + spin_lock(&ci->i_ceph_lock); + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + snapc = ceph_get_snap_context(capsnap->context); + } else { + BUG_ON(!ci->i_head_snapc); + snapc = ceph_get_snap_context(ci->i_head_snapc); + } + spin_unlock(&ci->i_ceph_lock); + /* we might need to revert back to that point */ data = *from; if (iocb->ki_flags & IOCB_DIRECT) - written = ceph_sync_direct_write(iocb, &data, pos); + written = ceph_sync_direct_write(iocb, &data, pos, + snapc); else - written = ceph_sync_write(iocb, &data, pos); + written = ceph_sync_write(iocb, &data, pos, snapc); if (written == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", @@ -1014,6 +1033,7 @@ retry_snap: } if (written > 0) iov_iter_advance(from, written); + ceph_put_snap_context(snapc); } else { loff_t old_size = inode->i_size; /* @@ -1035,7 +1055,8 @@ retry_snap: int dirty; spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, + &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); @@ -1059,6 +1080,7 @@ retry_snap: out: mutex_unlock(&inode->i_mutex); out_unlocked: + ceph_free_cap_flush(prealloc_cf); current->backing_dev_info = NULL; return written ? written : err; } @@ -1255,6 +1277,7 @@ static long ceph_fallocate(struct file *file, int mode, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; + struct ceph_cap_flush *prealloc_cf; int want, got = 0; int dirty; int ret = 0; @@ -1267,6 +1290,10 @@ static long ceph_fallocate(struct file *file, int mode, if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + mutex_lock(&inode->i_mutex); if (ceph_snap(inode) != CEPH_NOSNAP) { @@ -1313,7 +1340,8 @@ static long ceph_fallocate(struct file *file, int mode, if (!ret) { spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, + &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); @@ -1322,6 +1350,7 @@ static long ceph_fallocate(struct file *file, int mode, ceph_put_cap_refs(ci, got); unlock: mutex_unlock(&inode->i_mutex); + ceph_free_cap_flush(prealloc_cf); return ret; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 571acd88606c..96d2bd829902 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -389,9 +389,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_inline_version = 0; ci->i_time_warp_seq = 0; ci->i_ceph_flags = 0; - ci->i_ordered_count = 0; - atomic_set(&ci->i_release_count, 1); - atomic_set(&ci->i_complete_count, 0); + atomic64_set(&ci->i_ordered_count, 1); + atomic64_set(&ci->i_release_count, 1); + atomic64_set(&ci->i_complete_seq[0], 0); + atomic64_set(&ci->i_complete_seq[1], 0); ci->i_symlink = NULL; memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); @@ -415,9 +416,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_flushing_caps = 0; INIT_LIST_HEAD(&ci->i_dirty_item); INIT_LIST_HEAD(&ci->i_flushing_item); - ci->i_cap_flush_seq = 0; - ci->i_cap_flush_last_tid = 0; - memset(&ci->i_cap_flush_tid, 0, sizeof(ci->i_cap_flush_tid)); + ci->i_prealloc_cap_flush = NULL; + ci->i_cap_flush_tree = RB_ROOT; init_waitqueue_head(&ci->i_cap_wq); ci->i_hold_caps_min = 0; ci->i_hold_caps_max = 0; @@ -752,7 +752,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page, if (new_version || (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { + if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool) + ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; ci->i_layout = info->layout; + queue_trunc = ceph_fill_file_size(inode, issued, le32_to_cpu(info->truncate_seq), le64_to_cpu(info->truncate_size), @@ -858,9 +861,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page, (issued & CEPH_CAP_FILE_EXCL) == 0 && !__ceph_dir_is_complete(ci)) { dout(" marking %p complete (empty)\n", inode); + i_size_write(inode, 0); __ceph_dir_set_complete(ci, - atomic_read(&ci->i_release_count), - ci->i_ordered_count); + atomic64_read(&ci->i_release_count), + atomic64_read(&ci->i_ordered_count)); } wake = true; @@ -1212,6 +1216,10 @@ retry_lookup: dout("fill_trace doing d_move %p -> %p\n", req->r_old_dentry, dn); + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_ordered(dir); + ceph_dir_clear_ordered(olddir); + d_move(req->r_old_dentry, dn); dout(" src %p '%pd' dst %p '%pd'\n", req->r_old_dentry, @@ -1222,10 +1230,6 @@ retry_lookup: rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(dn); - /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_ordered(dir); - ceph_dir_clear_ordered(olddir); - dout("dn %p gets new offset %lld\n", req->r_old_dentry, ceph_dentry(req->r_old_dentry)->offset); @@ -1333,6 +1337,49 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, return err; } +void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl) +{ + if (ctl->page) { + kunmap(ctl->page); + page_cache_release(ctl->page); + ctl->page = NULL; + } +} + +static int fill_readdir_cache(struct inode *dir, struct dentry *dn, + struct ceph_readdir_cache_control *ctl, + struct ceph_mds_request *req) +{ + struct ceph_inode_info *ci = ceph_inode(dir); + unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry*); + unsigned idx = ctl->index % nsize; + pgoff_t pgoff = ctl->index / nsize; + + if (!ctl->page || pgoff != page_index(ctl->page)) { + ceph_readdir_cache_release(ctl); + ctl->page = grab_cache_page(&dir->i_data, pgoff); + if (!ctl->page) { + ctl->index = -1; + return -ENOMEM; + } + /* reading/filling the cache are serialized by + * i_mutex, no need to use page lock */ + unlock_page(ctl->page); + ctl->dentries = kmap(ctl->page); + } + + if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) && + req->r_dir_ordered_cnt == atomic64_read(&ci->i_ordered_count)) { + dout("readdir cache dn %p idx %d\n", dn, ctl->index); + ctl->dentries[idx] = dn; + ctl->index++; + } else { + dout("disable readdir cache\n"); + ctl->index = -1; + } + return 0; +} + int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct ceph_mds_session *session) { @@ -1345,8 +1392,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct inode *snapdir = NULL; struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; struct ceph_dentry_info *di; - u64 r_readdir_offset = req->r_readdir_offset; u32 frag = le32_to_cpu(rhead->args.readdir.frag); + struct ceph_readdir_cache_control cache_ctl = {}; + + if (req->r_aborted) + return readdir_prepopulate_inodes_only(req, session); if (rinfo->dir_dir && le32_to_cpu(rinfo->dir_dir->frag) != frag) { @@ -1354,14 +1404,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, frag, le32_to_cpu(rinfo->dir_dir->frag)); frag = le32_to_cpu(rinfo->dir_dir->frag); if (ceph_frag_is_leftmost(frag)) - r_readdir_offset = 2; + req->r_readdir_offset = 2; else - r_readdir_offset = 0; + req->r_readdir_offset = 0; } - if (req->r_aborted) - return readdir_prepopulate_inodes_only(req, session); - if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) { snapdir = ceph_get_snapdir(d_inode(parent)); parent = d_find_alias(snapdir); @@ -1374,6 +1421,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir); } + if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2) { + /* note dir version at start of readdir so we can tell + * if any dentries get dropped */ + struct ceph_inode_info *ci = ceph_inode(d_inode(parent)); + req->r_dir_release_cnt = atomic64_read(&ci->i_release_count); + req->r_dir_ordered_cnt = atomic64_read(&ci->i_ordered_count); + req->r_readdir_cache_idx = 0; + } + + cache_ctl.index = req->r_readdir_cache_idx; + /* FIXME: release caps/leases if error occurs */ for (i = 0; i < rinfo->dir_nr; i++) { struct ceph_vino vino; @@ -1413,13 +1471,6 @@ retry_lookup: d_delete(dn); dput(dn); goto retry_lookup; - } else { - /* reorder parent's d_subdirs */ - spin_lock(&parent->d_lock); - spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_child, &parent->d_subdirs); - spin_unlock(&dn->d_lock); - spin_unlock(&parent->d_lock); } /* inode */ @@ -1436,13 +1487,15 @@ retry_lookup: } } - if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, - req->r_request_started, -1, - &req->r_caps_reservation) < 0) { + ret = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, + req->r_request_started, -1, + &req->r_caps_reservation); + if (ret < 0) { pr_err("fill_inode badness on %p\n", in); if (d_really_is_negative(dn)) iput(in); d_drop(dn); + err = ret; goto next_item; } @@ -1458,19 +1511,28 @@ retry_lookup: } di = dn->d_fsdata; - di->offset = ceph_make_fpos(frag, i + r_readdir_offset); + di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); update_dentry_lease(dn, rinfo->dir_dlease[i], req->r_session, req->r_request_started); + + if (err == 0 && cache_ctl.index >= 0) { + ret = fill_readdir_cache(d_inode(parent), dn, + &cache_ctl, req); + if (ret < 0) + err = ret; + } next_item: if (dn) dput(dn); } - if (err == 0) - req->r_did_prepopulate = true; - out: + if (err == 0) { + req->r_did_prepopulate = true; + req->r_readdir_cache_idx = cache_ctl.index; + } + ceph_readdir_cache_release(&cache_ctl); if (snapdir) { iput(snapdir); dput(parent); @@ -1712,11 +1774,13 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) const unsigned int ia_valid = attr->ia_valid; struct ceph_mds_request *req; struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf; int issued; int release = 0, dirtied = 0; int mask = 0; int err = 0; int inode_dirty_flags = 0; + bool lock_snap_rwsem = false; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -1725,13 +1789,31 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR, USE_AUTH_MDS); - if (IS_ERR(req)) + if (IS_ERR(req)) { + ceph_free_cap_flush(prealloc_cf); return PTR_ERR(req); + } spin_lock(&ci->i_ceph_lock); issued = __ceph_caps_issued(ci, NULL); + + if (!ci->i_head_snapc && + (issued & (CEPH_CAP_ANY_EXCL | CEPH_CAP_FILE_WR))) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + issued = __ceph_caps_issued(ci, NULL); + } + } + dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); if (ia_valid & ATTR_UID) { @@ -1874,12 +1956,15 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) dout("setattr %p ATTR_FILE ... hrm!\n", inode); if (dirtied) { - inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied); + inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, + &prealloc_cf); inode->i_ctime = CURRENT_TIME; } release &= issued; spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (inode_dirty_flags) __mark_inode_dirty(inode, inode_dirty_flags); @@ -1904,9 +1989,11 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) ceph_mdsc_put_request(req); if (mask & CEPH_SETATTR_SIZE) __ceph_do_pending_vmtruncate(inode); + ceph_free_cap_flush(prealloc_cf); return err; out_put: ceph_mdsc_put_request(req); + ceph_free_cap_flush(prealloc_cf); return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 84f37f34f9aa..6aa07af67603 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -8,6 +8,7 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/utsname.h> +#include <linux/ratelimit.h> #include "super.h" #include "mds_client.h" @@ -458,7 +459,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_cap_reconnect = 0; s->s_cap_iterator = NULL; INIT_LIST_HEAD(&s->s_cap_releases); - INIT_LIST_HEAD(&s->s_cap_releases_done); INIT_LIST_HEAD(&s->s_cap_flushing); INIT_LIST_HEAD(&s->s_cap_snaps_flushing); @@ -629,6 +629,9 @@ static void __register_request(struct ceph_mds_client *mdsc, req->r_uid = current_fsuid(); req->r_gid = current_fsgid(); + if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK) + mdsc->oldest_tid = req->r_tid; + if (dir) { struct ceph_inode_info *ci = ceph_inode(dir); @@ -644,6 +647,21 @@ static void __unregister_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { dout("__unregister_request %p tid %lld\n", req, req->r_tid); + + if (req->r_tid == mdsc->oldest_tid) { + struct rb_node *p = rb_next(&req->r_node); + mdsc->oldest_tid = 0; + while (p) { + struct ceph_mds_request *next_req = + rb_entry(p, struct ceph_mds_request, r_node); + if (next_req->r_op != CEPH_MDS_OP_SETFILELOCK) { + mdsc->oldest_tid = next_req->r_tid; + break; + } + p = rb_next(p); + } + } + rb_erase(&req->r_node, &mdsc->request_tree); RB_CLEAR_NODE(&req->r_node); @@ -998,27 +1016,25 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, * session caps */ -/* - * Free preallocated cap messages assigned to this session - */ -static void cleanup_cap_releases(struct ceph_mds_session *session) +/* caller holds s_cap_lock, we drop it */ +static void cleanup_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) + __releases(session->s_cap_lock) { - struct ceph_msg *msg; + LIST_HEAD(tmp_list); + list_splice_init(&session->s_cap_releases, &tmp_list); + session->s_num_cap_releases = 0; + spin_unlock(&session->s_cap_lock); - spin_lock(&session->s_cap_lock); - while (!list_empty(&session->s_cap_releases)) { - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - ceph_msg_put(msg); - } - while (!list_empty(&session->s_cap_releases_done)) { - msg = list_first_entry(&session->s_cap_releases_done, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - ceph_msg_put(msg); + dout("cleanup_cap_releases mds%d\n", session->s_mds); + while (!list_empty(&tmp_list)) { + struct ceph_cap *cap; + /* zero out the in-progress message */ + cap = list_first_entry(&tmp_list, + struct ceph_cap, session_caps); + list_del(&cap->session_caps); + ceph_put_cap(mdsc, cap); } - spin_unlock(&session->s_cap_lock); } static void cleanup_session_requests(struct ceph_mds_client *mdsc, @@ -1033,7 +1049,8 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, req = list_first_entry(&session->s_unsafe, struct ceph_mds_request, r_unsafe_item); list_del_init(&req->r_unsafe_item); - pr_info(" dropping unsafe request %llu\n", req->r_tid); + pr_warn_ratelimited(" dropping unsafe request %llu\n", + req->r_tid); __unregister_request(mdsc, req); } /* zero r_attempts, so kick_requests() will re-send requests */ @@ -1095,10 +1112,16 @@ static int iterate_session_caps(struct ceph_mds_session *session, dout("iterate_session_caps finishing cap %p removal\n", cap); BUG_ON(cap->session != session); + cap->session = NULL; list_del_init(&cap->session_caps); session->s_nr_caps--; - cap->session = NULL; - old_cap = cap; /* put_cap it w/o locks held */ + if (cap->queue_release) { + list_add_tail(&cap->session_caps, + &session->s_cap_releases); + session->s_num_cap_releases++; + } else { + old_cap = cap; /* put_cap it w/o locks held */ + } } if (ret < 0) goto out; @@ -1119,6 +1142,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) { struct ceph_inode_info *ci = ceph_inode(inode); + LIST_HEAD(to_remove); int drop = 0; dout("removing cap %p, ci is %p, inode is %p\n", @@ -1126,12 +1150,27 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, spin_lock(&ci->i_ceph_lock); __ceph_remove_cap(cap, false); if (!ci->i_auth_cap) { + struct ceph_cap_flush *cf; struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + while (true) { + struct rb_node *n = rb_first(&ci->i_cap_flush_tree); + if (!n) + break; + cf = rb_entry(n, struct ceph_cap_flush, i_node); + rb_erase(&cf->i_node, &ci->i_cap_flush_tree); + list_add(&cf->list, &to_remove); + } + spin_lock(&mdsc->cap_dirty_lock); + + list_for_each_entry(cf, &to_remove, list) + rb_erase(&cf->g_node, &mdsc->cap_flush_tree); + if (!list_empty(&ci->i_dirty_item)) { - pr_info(" dropping dirty %s state for %p %lld\n", + pr_warn_ratelimited( + " dropping dirty %s state for %p %lld\n", ceph_cap_string(ci->i_dirty_caps), inode, ceph_ino(inode)); ci->i_dirty_caps = 0; @@ -1139,7 +1178,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, drop = 1; } if (!list_empty(&ci->i_flushing_item)) { - pr_info(" dropping dirty+flushing %s state for %p %lld\n", + pr_warn_ratelimited( + " dropping dirty+flushing %s state for %p %lld\n", ceph_cap_string(ci->i_flushing_caps), inode, ceph_ino(inode)); ci->i_flushing_caps = 0; @@ -1148,8 +1188,20 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, drop = 1; } spin_unlock(&mdsc->cap_dirty_lock); + + if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { + list_add(&ci->i_prealloc_cap_flush->list, &to_remove); + ci->i_prealloc_cap_flush = NULL; + } } spin_unlock(&ci->i_ceph_lock); + while (!list_empty(&to_remove)) { + struct ceph_cap_flush *cf; + cf = list_first_entry(&to_remove, + struct ceph_cap_flush, list); + list_del(&cf->list); + ceph_free_cap_flush(cf); + } while (drop--) iput(inode); return 0; @@ -1191,11 +1243,12 @@ static void remove_session_caps(struct ceph_mds_session *session) spin_lock(&session->s_cap_lock); } } - spin_unlock(&session->s_cap_lock); + + // drop cap expires and unlock s_cap_lock + cleanup_cap_releases(session->s_mdsc, session); BUG_ON(session->s_nr_caps > 0); BUG_ON(!list_empty(&session->s_cap_flushing)); - cleanup_cap_releases(session); } /* @@ -1371,7 +1424,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued), ceph_cap_string(used), ceph_cap_string(wanted)); if (cap == ci->i_auth_cap) { - if (ci->i_dirty_caps | ci->i_flushing_caps) + if (ci->i_dirty_caps || ci->i_flushing_caps || + !list_empty(&ci->i_cap_snaps)) goto out; if ((used | wanted) & CEPH_CAP_ANY_WR) goto out; @@ -1417,121 +1471,80 @@ static int trim_caps(struct ceph_mds_client *mdsc, session->s_trim_caps = 0; } - ceph_add_cap_releases(mdsc, session); ceph_send_cap_releases(mdsc, session); return 0; } -/* - * Allocate cap_release messages. If there is a partially full message - * in the queue, try to allocate enough to cover it's remainder, so that - * we can send it immediately. - * - * Called under s_mutex. - */ -int ceph_add_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) +static int check_capsnap_flush(struct ceph_inode_info *ci, + u64 want_snap_seq) { - struct ceph_msg *msg, *partial = NULL; - struct ceph_mds_cap_release *head; - int err = -ENOMEM; - int extra = mdsc->fsc->mount_options->cap_release_safety; - int num; - - dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, - extra); - - spin_lock(&session->s_cap_lock); - - if (!list_empty(&session->s_cap_releases)) { - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, - list_head); - head = msg->front.iov_base; - num = le32_to_cpu(head->num); - if (num) { - dout(" partial %p with (%d/%d)\n", msg, num, - (int)CEPH_CAPS_PER_RELEASE); - extra += CEPH_CAPS_PER_RELEASE - num; - partial = msg; - } - } - while (session->s_num_cap_releases < session->s_nr_caps + extra) { - spin_unlock(&session->s_cap_lock); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, - GFP_NOFS, false); - if (!msg) - goto out_unlocked; - dout("add_cap_releases %p msg %p now %d\n", session, msg, - (int)msg->front.iov_len); - head = msg->front.iov_base; - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - spin_lock(&session->s_cap_lock); - list_add(&msg->list_head, &session->s_cap_releases); - session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; - } - - if (partial) { - head = partial->front.iov_base; - num = le32_to_cpu(head->num); - dout(" queueing partial %p with %d/%d\n", partial, num, - (int)CEPH_CAPS_PER_RELEASE); - list_move_tail(&partial->list_head, - &session->s_cap_releases_done); - session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num; + int ret = 1; + spin_lock(&ci->i_ceph_lock); + if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) { + struct ceph_cap_snap *capsnap = + list_first_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, ci_item); + ret = capsnap->follows >= want_snap_seq; } - err = 0; - spin_unlock(&session->s_cap_lock); -out_unlocked: - return err; + spin_unlock(&ci->i_ceph_lock); + return ret; } -static int check_cap_flush(struct inode *inode, u64 want_flush_seq) +static int check_caps_flush(struct ceph_mds_client *mdsc, + u64 want_flush_tid) { - struct ceph_inode_info *ci = ceph_inode(inode); - int ret; - spin_lock(&ci->i_ceph_lock); - if (ci->i_flushing_caps) - ret = ci->i_cap_flush_seq >= want_flush_seq; - else - ret = 1; - spin_unlock(&ci->i_ceph_lock); + struct rb_node *n; + struct ceph_cap_flush *cf; + int ret = 1; + + spin_lock(&mdsc->cap_dirty_lock); + n = rb_first(&mdsc->cap_flush_tree); + cf = n ? rb_entry(n, struct ceph_cap_flush, g_node) : NULL; + if (cf && cf->tid <= want_flush_tid) { + dout("check_caps_flush still flushing tid %llu <= %llu\n", + cf->tid, want_flush_tid); + ret = 0; + } + spin_unlock(&mdsc->cap_dirty_lock); return ret; } /* * flush all dirty inode data to disk. * - * returns true if we've flushed through want_flush_seq + * returns true if we've flushed through want_flush_tid */ -static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) +static void wait_caps_flush(struct ceph_mds_client *mdsc, + u64 want_flush_tid, u64 want_snap_seq) { int mds; - dout("check_cap_flush want %lld\n", want_flush_seq); + dout("check_caps_flush want %llu snap want %llu\n", + want_flush_tid, want_snap_seq); mutex_lock(&mdsc->mutex); - for (mds = 0; mds < mdsc->max_sessions; mds++) { + for (mds = 0; mds < mdsc->max_sessions; ) { struct ceph_mds_session *session = mdsc->sessions[mds]; struct inode *inode = NULL; - if (!session) + if (!session) { + mds++; continue; + } get_session(session); mutex_unlock(&mdsc->mutex); mutex_lock(&session->s_mutex); - if (!list_empty(&session->s_cap_flushing)) { - struct ceph_inode_info *ci = - list_entry(session->s_cap_flushing.next, - struct ceph_inode_info, - i_flushing_item); - - if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) { - dout("check_cap_flush still flushing %p " - "seq %lld <= %lld to mds%d\n", - &ci->vfs_inode, ci->i_cap_flush_seq, - want_flush_seq, session->s_mds); + if (!list_empty(&session->s_cap_snaps_flushing)) { + struct ceph_cap_snap *capsnap = + list_first_entry(&session->s_cap_snaps_flushing, + struct ceph_cap_snap, + flushing_item); + struct ceph_inode_info *ci = capsnap->ci; + if (!check_capsnap_flush(ci, want_snap_seq)) { + dout("check_cap_flush still flushing snap %p " + "follows %lld <= %lld to mds%d\n", + &ci->vfs_inode, capsnap->follows, + want_snap_seq, mds); inode = igrab(&ci->vfs_inode); } } @@ -1540,15 +1553,21 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) if (inode) { wait_event(mdsc->cap_flushing_wq, - check_cap_flush(inode, want_flush_seq)); + check_capsnap_flush(ceph_inode(inode), + want_snap_seq)); iput(inode); + } else { + mds++; } mutex_lock(&mdsc->mutex); } - mutex_unlock(&mdsc->mutex); - dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq); + + wait_event(mdsc->cap_flushing_wq, + check_caps_flush(mdsc, want_flush_tid)); + + dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid); } /* @@ -1557,60 +1576,74 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { - struct ceph_msg *msg; + struct ceph_msg *msg = NULL; + struct ceph_mds_cap_release *head; + struct ceph_mds_cap_item *item; + struct ceph_cap *cap; + LIST_HEAD(tmp_list); + int num_cap_releases; - dout("send_cap_releases mds%d\n", session->s_mds); spin_lock(&session->s_cap_lock); - while (!list_empty(&session->s_cap_releases_done)) { - msg = list_first_entry(&session->s_cap_releases_done, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - spin_unlock(&session->s_cap_lock); - msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - dout("send_cap_releases mds%d %p\n", session->s_mds, msg); - ceph_con_send(&session->s_con, msg); - spin_lock(&session->s_cap_lock); - } +again: + list_splice_init(&session->s_cap_releases, &tmp_list); + num_cap_releases = session->s_num_cap_releases; + session->s_num_cap_releases = 0; spin_unlock(&session->s_cap_lock); -} -static void discard_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) -{ - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - unsigned num; - - dout("discard_cap_releases mds%d\n", session->s_mds); + while (!list_empty(&tmp_list)) { + if (!msg) { + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, + PAGE_CACHE_SIZE, GFP_NOFS, false); + if (!msg) + goto out_err; + head = msg->front.iov_base; + head->num = cpu_to_le32(0); + msg->front.iov_len = sizeof(*head); + } + cap = list_first_entry(&tmp_list, struct ceph_cap, + session_caps); + list_del(&cap->session_caps); + num_cap_releases--; - if (!list_empty(&session->s_cap_releases)) { - /* zero out the in-progress message */ - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); head = msg->front.iov_base; - num = le32_to_cpu(head->num); - dout("discard_cap_releases mds%d %p %u\n", - session->s_mds, msg, num); - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - session->s_num_cap_releases += num; + le32_add_cpu(&head->num, 1); + item = msg->front.iov_base + msg->front.iov_len; + item->ino = cpu_to_le64(cap->cap_ino); + item->cap_id = cpu_to_le64(cap->cap_id); + item->migrate_seq = cpu_to_le32(cap->mseq); + item->seq = cpu_to_le32(cap->issue_seq); + msg->front.iov_len += sizeof(*item); + + ceph_put_cap(mdsc, cap); + + if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + dout("send_cap_releases mds%d %p\n", session->s_mds, msg); + ceph_con_send(&session->s_con, msg); + msg = NULL; + } } - /* requeue completed messages */ - while (!list_empty(&session->s_cap_releases_done)) { - msg = list_first_entry(&session->s_cap_releases_done, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); + BUG_ON(num_cap_releases != 0); - head = msg->front.iov_base; - num = le32_to_cpu(head->num); - dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, - num); - session->s_num_cap_releases += num; - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - list_add(&msg->list_head, &session->s_cap_releases); + spin_lock(&session->s_cap_lock); + if (!list_empty(&session->s_cap_releases)) + goto again; + spin_unlock(&session->s_cap_lock); + + if (msg) { + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + dout("send_cap_releases mds%d %p\n", session->s_mds, msg); + ceph_con_send(&session->s_con, msg); } + return; +out_err: + pr_err("send_cap_releases mds%d, failed to allocate message\n", + session->s_mds); + spin_lock(&session->s_cap_lock); + list_splice(&tmp_list, &session->s_cap_releases); + session->s_num_cap_releases += num_cap_releases; + spin_unlock(&session->s_cap_lock); } /* @@ -1635,7 +1668,8 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, order = get_order(size * num_entries); while (order >= 0) { - rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN, + rinfo->dir_in = (void*)__get_free_pages(GFP_KERNEL | + __GFP_NOWARN, order); if (rinfo->dir_in) break; @@ -1697,13 +1731,9 @@ static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc) struct ceph_mds_request, r_node); } -static u64 __get_oldest_tid(struct ceph_mds_client *mdsc) +static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc) { - struct ceph_mds_request *req = __get_oldest_req(mdsc); - - if (req) - return req->r_tid; - return 0; + return mdsc->oldest_tid; } /* @@ -2267,15 +2297,18 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, /* wait */ mutex_unlock(&mdsc->mutex); dout("do_request waiting\n"); - if (req->r_timeout) { - err = (long)wait_for_completion_killable_timeout( - &req->r_completion, req->r_timeout); - if (err == 0) - err = -EIO; - } else if (req->r_wait_for_completion) { + if (!req->r_timeout && req->r_wait_for_completion) { err = req->r_wait_for_completion(mdsc, req); } else { - err = wait_for_completion_killable(&req->r_completion); + long timeleft = wait_for_completion_killable_timeout( + &req->r_completion, + ceph_timeout_jiffies(req->r_timeout)); + if (timeleft > 0) + err = 0; + else if (!timeleft) + err = -EIO; /* timed out */ + else + err = timeleft; /* killed */ } dout("do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); @@ -2496,7 +2529,6 @@ out_err: } mutex_unlock(&mdsc->mutex); - ceph_add_cap_releases(mdsc, req->r_session); mutex_unlock(&session->s_mutex); /* kick calling process */ @@ -2888,8 +2920,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, */ session->s_cap_reconnect = 1; /* drop old cap expires; we're about to reestablish that state */ - discard_cap_releases(mdsc, session); - spin_unlock(&session->s_cap_lock); + cleanup_cap_releases(mdsc, session); /* trim unused caps to reduce MDS's cache rejoin time */ if (mdsc->fsc->sb->s_root) @@ -2956,6 +2987,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, reply->hdr.data_len = cpu_to_le32(pagelist->length); ceph_msg_data_add_pagelist(reply, pagelist); + + ceph_early_kick_flushing_caps(mdsc, session); + ceph_con_send(&session->s_con, reply); mutex_unlock(&session->s_mutex); @@ -3352,7 +3386,6 @@ static void delayed_work(struct work_struct *work) send_renew_caps(mdsc, s); else ceph_con_keepalive(&s->s_con); - ceph_add_cap_releases(mdsc, s); if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG) ceph_send_cap_releases(mdsc, s); @@ -3390,11 +3423,13 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) atomic_set(&mdsc->num_sessions, 0); mdsc->max_sessions = 0; mdsc->stopping = 0; + mdsc->last_snap_seq = 0; init_rwsem(&mdsc->snap_rwsem); mdsc->snap_realms = RB_ROOT; INIT_LIST_HEAD(&mdsc->snap_empty); spin_lock_init(&mdsc->snap_empty_lock); mdsc->last_tid = 0; + mdsc->oldest_tid = 0; mdsc->request_tree = RB_ROOT; INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); mdsc->last_renew_caps = jiffies; @@ -3402,7 +3437,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); - mdsc->cap_flush_seq = 0; + mdsc->last_cap_flush_tid = 1; + mdsc->cap_flush_tree = RB_ROOT; INIT_LIST_HEAD(&mdsc->cap_dirty); INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); mdsc->num_cap_flushing = 0; @@ -3414,6 +3450,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) ceph_caps_init(mdsc); ceph_adjust_min_caps(mdsc, fsc->min_caps); + init_rwsem(&mdsc->pool_perm_rwsem); + mdsc->pool_perm_tree = RB_ROOT; + return 0; } @@ -3423,8 +3462,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) */ static void wait_requests(struct ceph_mds_client *mdsc) { + struct ceph_options *opts = mdsc->fsc->client->options; struct ceph_mds_request *req; - struct ceph_fs_client *fsc = mdsc->fsc; mutex_lock(&mdsc->mutex); if (__get_oldest_req(mdsc)) { @@ -3432,7 +3471,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) dout("wait_requests waiting for requests\n"); wait_for_completion_timeout(&mdsc->safe_umount_waiters, - fsc->client->options->mount_timeout * HZ); + ceph_timeout_jiffies(opts->mount_timeout)); /* tear down remaining requests */ mutex_lock(&mdsc->mutex); @@ -3485,7 +3524,8 @@ restart: nextreq = rb_entry(n, struct ceph_mds_request, r_node); else nextreq = NULL; - if ((req->r_op & CEPH_MDS_OP_WRITE)) { + if (req->r_op != CEPH_MDS_OP_SETFILELOCK && + (req->r_op & CEPH_MDS_OP_WRITE)) { /* write op */ ceph_mdsc_get_request(req); if (nextreq) @@ -3513,7 +3553,7 @@ restart: void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { - u64 want_tid, want_flush; + u64 want_tid, want_flush, want_snap; if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) return; @@ -3525,13 +3565,18 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) ceph_flush_dirty_caps(mdsc); spin_lock(&mdsc->cap_dirty_lock); - want_flush = mdsc->cap_flush_seq; + want_flush = mdsc->last_cap_flush_tid; spin_unlock(&mdsc->cap_dirty_lock); - dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); + down_read(&mdsc->snap_rwsem); + want_snap = mdsc->last_snap_seq; + up_read(&mdsc->snap_rwsem); + + dout("sync want tid %lld flush_seq %lld snap_seq %lld\n", + want_tid, want_flush, want_snap); wait_unsafe_requests(mdsc, want_tid); - wait_caps_flush(mdsc, want_flush); + wait_caps_flush(mdsc, want_flush, want_snap); } /* @@ -3549,10 +3594,9 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc) */ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { + struct ceph_options *opts = mdsc->fsc->client->options; struct ceph_mds_session *session; int i; - struct ceph_fs_client *fsc = mdsc->fsc; - unsigned long timeout = fsc->client->options->mount_timeout * HZ; dout("close_sessions\n"); @@ -3573,7 +3617,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) dout("waiting for sessions to close\n"); wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc), - timeout); + ceph_timeout_jiffies(opts->mount_timeout)); /* tear down remaining sessions */ mutex_lock(&mdsc->mutex); @@ -3607,6 +3651,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) ceph_mdsmap_destroy(mdsc->mdsmap); kfree(mdsc->sessions); ceph_caps_finalize(mdsc); + ceph_pool_perm_destroy(mdsc); } void ceph_mdsc_destroy(struct ceph_fs_client *fsc) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 1875b5d985c6..762757e6cebf 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -139,7 +139,6 @@ struct ceph_mds_session { int s_cap_reconnect; int s_readonly; struct list_head s_cap_releases; /* waiting cap_release messages */ - struct list_head s_cap_releases_done; /* ready to send */ struct ceph_cap *s_cap_iterator; /* protected by mutex */ @@ -228,7 +227,7 @@ struct ceph_mds_request { int r_err; bool r_aborted; - unsigned long r_timeout; /* optional. jiffies */ + unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */ unsigned long r_started; /* start time to measure timeout against */ unsigned long r_request_started; /* start time for mds request only, used to measure lease durations */ @@ -254,12 +253,21 @@ struct ceph_mds_request { bool r_got_unsafe, r_got_safe, r_got_result; bool r_did_prepopulate; + long long r_dir_release_cnt; + long long r_dir_ordered_cnt; + int r_readdir_cache_idx; u32 r_readdir_offset; struct ceph_cap_reservation r_caps_reservation; int r_num_caps; }; +struct ceph_pool_perm { + struct rb_node node; + u32 pool; + int perm; +}; + /* * mds client state */ @@ -284,12 +292,15 @@ struct ceph_mds_client { * references (implying they contain no inodes with caps) that * should be destroyed. */ + u64 last_snap_seq; struct rw_semaphore snap_rwsem; struct rb_root snap_realms; struct list_head snap_empty; spinlock_t snap_empty_lock; /* protect snap_empty */ u64 last_tid; /* most recent mds request */ + u64 oldest_tid; /* oldest incomplete mds request, + excluding setfilelock requests */ struct rb_root request_tree; /* pending mds requests */ struct delayed_work delayed_work; /* delayed work */ unsigned long last_renew_caps; /* last time we renewed our caps */ @@ -298,7 +309,8 @@ struct ceph_mds_client { struct list_head snap_flush_list; /* cap_snaps ready to flush */ spinlock_t snap_flush_lock; - u64 cap_flush_seq; + u64 last_cap_flush_tid; + struct rb_root cap_flush_tree; struct list_head cap_dirty; /* inodes with dirty caps */ struct list_head cap_dirty_migrating; /* ...that are migration... */ int num_cap_flushing; /* # caps we are flushing */ @@ -328,6 +340,9 @@ struct ceph_mds_client { spinlock_t dentry_lru_lock; struct list_head dentry_lru; int num_dentry; + + struct rw_semaphore pool_perm_rwsem; + struct rb_root pool_perm_tree; }; extern const char *ceph_mds_op_name(int op); @@ -379,8 +394,6 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } -extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session); extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index a97e39f09ba6..233d906aec02 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -296,7 +296,7 @@ static int cmpu64_rev(const void *a, const void *b) } -static struct ceph_snap_context *empty_snapc; +struct ceph_snap_context *ceph_empty_snapc; /* * build the snap context for a given realm. @@ -338,9 +338,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) return 0; } - if (num == 0 && realm->seq == empty_snapc->seq) { - ceph_get_snap_context(empty_snapc); - snapc = empty_snapc; + if (num == 0 && realm->seq == ceph_empty_snapc->seq) { + ceph_get_snap_context(ceph_empty_snapc); + snapc = ceph_empty_snapc; goto done; } @@ -436,6 +436,14 @@ static int dup_array(u64 **dst, __le64 *src, u32 num) return 0; } +static bool has_new_snaps(struct ceph_snap_context *o, + struct ceph_snap_context *n) +{ + if (n->num_snaps == 0) + return false; + /* snaps are in descending order */ + return n->snaps[0] > o->seq; +} /* * When a snapshot is applied, the size/mtime inode metadata is queued @@ -455,6 +463,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) { struct inode *inode = &ci->vfs_inode; struct ceph_cap_snap *capsnap; + struct ceph_snap_context *old_snapc, *new_snapc; int used, dirty; capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); @@ -467,6 +476,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); + old_snapc = ci->i_head_snapc; + new_snapc = ci->i_snap_realm->cached_context; + /* * If there is a write in progress, treat that as a dirty Fw, * even though it hasn't completed yet; by the time we finish @@ -481,76 +493,95 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) writes in progress now were started before the previous cap_snap. lucky us. */ dout("queue_cap_snap %p already pending\n", inode); - kfree(capsnap); - } else if (ci->i_snap_realm->cached_context == empty_snapc) { - dout("queue_cap_snap %p empty snapc\n", inode); - kfree(capsnap); - } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| - CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { - struct ceph_snap_context *snapc = ci->i_head_snapc; - - /* - * if we are a sync write, we may need to go to the snaprealm - * to get the current snapc. - */ - if (!snapc) - snapc = ci->i_snap_realm->cached_context; + goto update_snapc; + } + if (ci->i_wrbuffer_ref_head == 0 && + !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) { + dout("queue_cap_snap %p nothing dirty|writing\n", inode); + goto update_snapc; + } - dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n", - inode, capsnap, snapc, ceph_cap_string(dirty)); - ihold(inode); + BUG_ON(!old_snapc); - atomic_set(&capsnap->nref, 1); - capsnap->ci = ci; - INIT_LIST_HEAD(&capsnap->ci_item); - INIT_LIST_HEAD(&capsnap->flushing_item); - - capsnap->follows = snapc->seq; - capsnap->issued = __ceph_caps_issued(ci, NULL); - capsnap->dirty = dirty; - - capsnap->mode = inode->i_mode; - capsnap->uid = inode->i_uid; - capsnap->gid = inode->i_gid; - - if (dirty & CEPH_CAP_XATTR_EXCL) { - __ceph_build_xattrs_blob(ci); - capsnap->xattr_blob = - ceph_buffer_get(ci->i_xattrs.blob); - capsnap->xattr_version = ci->i_xattrs.version; - } else { - capsnap->xattr_blob = NULL; - capsnap->xattr_version = 0; + /* + * There is no need to send FLUSHSNAP message to MDS if there is + * no new snapshot. But when there is dirty pages or on-going + * writes, we still need to create cap_snap. cap_snap is needed + * by the write path and page writeback path. + * + * also see ceph_try_drop_cap_snap() + */ + if (has_new_snaps(old_snapc, new_snapc)) { + if (dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR)) + capsnap->need_flush = true; + } else { + if (!(used & CEPH_CAP_FILE_WR) && + ci->i_wrbuffer_ref_head == 0) { + dout("queue_cap_snap %p " + "no new_snap|dirty_page|writing\n", inode); + goto update_snapc; } + } - capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; - - /* dirty page count moved from _head to this cap_snap; - all subsequent writes page dirties occur _after_ this - snapshot. */ - capsnap->dirty_pages = ci->i_wrbuffer_ref_head; - ci->i_wrbuffer_ref_head = 0; - capsnap->context = snapc; - ci->i_head_snapc = - ceph_get_snap_context(ci->i_snap_realm->cached_context); - dout(" new snapc is %p\n", ci->i_head_snapc); - list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); - - if (used & CEPH_CAP_FILE_WR) { - dout("queue_cap_snap %p cap_snap %p snapc %p" - " seq %llu used WR, now pending\n", inode, - capsnap, snapc, snapc->seq); - capsnap->writing = 1; - } else { - /* note mtime, size NOW. */ - __ceph_finish_cap_snap(ci, capsnap); - } + dout("queue_cap_snap %p cap_snap %p queuing under %p %s %s\n", + inode, capsnap, old_snapc, ceph_cap_string(dirty), + capsnap->need_flush ? "" : "no_flush"); + ihold(inode); + + atomic_set(&capsnap->nref, 1); + capsnap->ci = ci; + INIT_LIST_HEAD(&capsnap->ci_item); + INIT_LIST_HEAD(&capsnap->flushing_item); + + capsnap->follows = old_snapc->seq; + capsnap->issued = __ceph_caps_issued(ci, NULL); + capsnap->dirty = dirty; + + capsnap->mode = inode->i_mode; + capsnap->uid = inode->i_uid; + capsnap->gid = inode->i_gid; + + if (dirty & CEPH_CAP_XATTR_EXCL) { + __ceph_build_xattrs_blob(ci); + capsnap->xattr_blob = + ceph_buffer_get(ci->i_xattrs.blob); + capsnap->xattr_version = ci->i_xattrs.version; } else { - dout("queue_cap_snap %p nothing dirty|writing\n", inode); - kfree(capsnap); + capsnap->xattr_blob = NULL; + capsnap->xattr_version = 0; } + capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; + + /* dirty page count moved from _head to this cap_snap; + all subsequent writes page dirties occur _after_ this + snapshot. */ + capsnap->dirty_pages = ci->i_wrbuffer_ref_head; + ci->i_wrbuffer_ref_head = 0; + capsnap->context = old_snapc; + list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); + old_snapc = NULL; + + if (used & CEPH_CAP_FILE_WR) { + dout("queue_cap_snap %p cap_snap %p snapc %p" + " seq %llu used WR, now pending\n", inode, + capsnap, old_snapc, old_snapc->seq); + capsnap->writing = 1; + } else { + /* note mtime, size NOW. */ + __ceph_finish_cap_snap(ci, capsnap); + } + capsnap = NULL; + +update_snapc: + if (ci->i_head_snapc) { + ci->i_head_snapc = ceph_get_snap_context(new_snapc); + dout(" new snapc is %p\n", new_snapc); + } spin_unlock(&ci->i_ceph_lock); + + kfree(capsnap); + ceph_put_snap_context(old_snapc); } /* @@ -699,6 +730,8 @@ more: /* queue realm for cap_snap creation */ list_add(&realm->dirty_item, &dirty_realms); + if (realm->seq > mdsc->last_snap_seq) + mdsc->last_snap_seq = realm->seq; invalidate = 1; } else if (!realm->cached_context) { @@ -964,14 +997,14 @@ out: int __init ceph_snap_init(void) { - empty_snapc = ceph_create_snap_context(0, GFP_NOFS); - if (!empty_snapc) + ceph_empty_snapc = ceph_create_snap_context(0, GFP_NOFS); + if (!ceph_empty_snapc) return -ENOMEM; - empty_snapc->seq = 1; + ceph_empty_snapc->seq = 1; return 0; } void ceph_snap_exit(void) { - ceph_put_snap_context(empty_snapc); + ceph_put_snap_context(ceph_empty_snapc); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4e9905374078..d1c833c321b9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -134,10 +134,12 @@ enum { Opt_noino32, Opt_fscache, Opt_nofscache, + Opt_poolperm, + Opt_nopoolperm, #ifdef CONFIG_CEPH_FS_POSIX_ACL Opt_acl, #endif - Opt_noacl + Opt_noacl, }; static match_table_t fsopt_tokens = { @@ -165,6 +167,8 @@ static match_table_t fsopt_tokens = { {Opt_noino32, "noino32"}, {Opt_fscache, "fsc"}, {Opt_nofscache, "nofsc"}, + {Opt_poolperm, "poolperm"}, + {Opt_nopoolperm, "nopoolperm"}, #ifdef CONFIG_CEPH_FS_POSIX_ACL {Opt_acl, "acl"}, #endif @@ -268,6 +272,13 @@ static int parse_fsopt_token(char *c, void *private) case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; break; + case Opt_poolperm: + fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; + printk ("pool perm"); + break; + case Opt_nopoolperm: + fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; + break; #ifdef CONFIG_CEPH_FS_POSIX_ACL case Opt_acl: fsopt->sb_flags |= MS_POSIXACL; @@ -436,6 +447,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",nodcache"); if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) seq_puts(m, ",fsc"); + if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) + seq_puts(m, ",nopoolperm"); #ifdef CONFIG_CEPH_FS_POSIX_ACL if (fsopt->sb_flags & MS_POSIXACL) @@ -609,6 +622,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) */ struct kmem_cache *ceph_inode_cachep; struct kmem_cache *ceph_cap_cachep; +struct kmem_cache *ceph_cap_flush_cachep; struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; @@ -634,6 +648,10 @@ static int __init init_caches(void) SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); if (ceph_cap_cachep == NULL) goto bad_cap; + ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); + if (ceph_cap_flush_cachep == NULL) + goto bad_cap_flush; ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); @@ -652,6 +670,8 @@ static int __init init_caches(void) bad_file: kmem_cache_destroy(ceph_dentry_cachep); bad_dentry: + kmem_cache_destroy(ceph_cap_flush_cachep); +bad_cap_flush: kmem_cache_destroy(ceph_cap_cachep); bad_cap: kmem_cache_destroy(ceph_inode_cachep); @@ -668,6 +688,7 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); + kmem_cache_destroy(ceph_cap_flush_cachep); kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_file_cachep); @@ -729,7 +750,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.snap = CEPH_NOSNAP; req->r_started = started; - req->r_timeout = fsc->client->options->mount_timeout * HZ; + req->r_timeout = fsc->client->options->mount_timeout; req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); req->r_num_caps = 2; err = ceph_mdsc_do_request(mdsc, NULL, req); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fa20e1318939..860cc016e70d 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -35,6 +35,7 @@ #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ +#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \ CEPH_MOUNT_OPT_DCACHE) @@ -121,11 +122,21 @@ struct ceph_cap { struct rb_node ci_node; /* per-ci cap tree */ struct ceph_mds_session *session; struct list_head session_caps; /* per-session caplist */ - int mds; u64 cap_id; /* unique cap id (mds provided) */ - int issued; /* latest, from the mds */ - int implemented; /* implemented superset of issued (for revocation) */ - int mds_wanted; + union { + /* in-use caps */ + struct { + int issued; /* latest, from the mds */ + int implemented; /* implemented superset of + issued (for revocation) */ + int mds, mds_wanted; + }; + /* caps to release */ + struct { + u64 cap_ino; + int queue_release; + }; + }; u32 seq, issue_seq, mseq; u32 cap_gen; /* active/stale cycle */ unsigned long last_used; @@ -163,6 +174,7 @@ struct ceph_cap_snap { int writing; /* a sync write is still in progress */ int dirty_pages; /* dirty pages awaiting writeback */ bool inline_data; + bool need_flush; }; static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) @@ -174,6 +186,17 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) } } +struct ceph_cap_flush { + u64 tid; + int caps; + bool kick; + struct rb_node g_node; // global + union { + struct rb_node i_node; // inode + struct list_head list; + }; +}; + /* * The frag tree describes how a directory is fragmented, potentially across * multiple metadata servers. It is also used to indicate points where @@ -259,9 +282,9 @@ struct ceph_inode_info { u32 i_time_warp_seq; unsigned i_ceph_flags; - int i_ordered_count; - atomic_t i_release_count; - atomic_t i_complete_count; + atomic64_t i_release_count; + atomic64_t i_ordered_count; + atomic64_t i_complete_seq[2]; struct ceph_dir_layout i_dir_layout; struct ceph_file_layout i_layout; @@ -283,11 +306,11 @@ struct ceph_inode_info { struct ceph_cap *i_auth_cap; /* authoritative cap, if any */ unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */ struct list_head i_dirty_item, i_flushing_item; - u64 i_cap_flush_seq; /* we need to track cap writeback on a per-cap-bit basis, to allow * overlapping, pipelined cap flushes to the mds. we can probably * reduce the tid to 8 bits if we're concerned about inode size. */ - u16 i_cap_flush_last_tid, i_cap_flush_tid[CEPH_CAP_BITS]; + struct ceph_cap_flush *i_prealloc_cap_flush; + struct rb_root i_cap_flush_tree; wait_queue_head_t i_cap_wq; /* threads waiting on a capability */ unsigned long i_hold_caps_min; /* jiffies */ unsigned long i_hold_caps_max; /* jiffies */ @@ -438,36 +461,46 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, /* * Ceph inode. */ -#define CEPH_I_DIR_ORDERED 1 /* dentries in dir are ordered */ -#define CEPH_I_NODELAY 4 /* do not delay cap release */ -#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ -#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ +#define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */ +#define CEPH_I_NODELAY (1 << 1) /* do not delay cap release */ +#define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */ +#define CEPH_I_NOFLUSH (1 << 3) /* do not flush dirty caps */ +#define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */ +#define CEPH_I_POOL_RD (1 << 5) /* can read from pool */ +#define CEPH_I_POOL_WR (1 << 6) /* can write to pool */ + static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, - int release_count, int ordered_count) + long long release_count, + long long ordered_count) { - atomic_set(&ci->i_complete_count, release_count); - if (ci->i_ordered_count == ordered_count) - ci->i_ceph_flags |= CEPH_I_DIR_ORDERED; - else - ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED; + smp_mb__before_atomic(); + atomic64_set(&ci->i_complete_seq[0], release_count); + atomic64_set(&ci->i_complete_seq[1], ordered_count); } static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci) { - atomic_inc(&ci->i_release_count); + atomic64_inc(&ci->i_release_count); +} + +static inline void __ceph_dir_clear_ordered(struct ceph_inode_info *ci) +{ + atomic64_inc(&ci->i_ordered_count); } static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci) { - return atomic_read(&ci->i_complete_count) == - atomic_read(&ci->i_release_count); + return atomic64_read(&ci->i_complete_seq[0]) == + atomic64_read(&ci->i_release_count); } static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci) { - return __ceph_dir_is_complete(ci) && - (ci->i_ceph_flags & CEPH_I_DIR_ORDERED); + return atomic64_read(&ci->i_complete_seq[0]) == + atomic64_read(&ci->i_release_count) && + atomic64_read(&ci->i_complete_seq[1]) == + atomic64_read(&ci->i_ordered_count); } static inline void ceph_dir_clear_complete(struct inode *inode) @@ -477,20 +510,13 @@ static inline void ceph_dir_clear_complete(struct inode *inode) static inline void ceph_dir_clear_ordered(struct inode *inode) { - struct ceph_inode_info *ci = ceph_inode(inode); - spin_lock(&ci->i_ceph_lock); - ci->i_ordered_count++; - ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED; - spin_unlock(&ci->i_ceph_lock); + __ceph_dir_clear_ordered(ceph_inode(inode)); } static inline bool ceph_dir_is_complete_ordered(struct inode *inode) { - struct ceph_inode_info *ci = ceph_inode(inode); - bool ret; - spin_lock(&ci->i_ceph_lock); - ret = __ceph_dir_is_complete_ordered(ci); - spin_unlock(&ci->i_ceph_lock); + bool ret = __ceph_dir_is_complete_ordered(ceph_inode(inode)); + smp_rmb(); return ret; } @@ -552,7 +578,10 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci) { return ci->i_dirty_caps | ci->i_flushing_caps; } -extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); +extern struct ceph_cap_flush *ceph_alloc_cap_flush(void); +extern void ceph_free_cap_flush(struct ceph_cap_flush *cf); +extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, + struct ceph_cap_flush **pcf); extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci, struct ceph_cap *ocap, int mask); @@ -606,16 +635,20 @@ struct ceph_file_info { unsigned offset; /* offset of last chunk, adjusted for . and .. */ unsigned next_offset; /* offset of next chunk (last_name's + 1) */ char *last_name; /* last entry in previous chunk */ - struct dentry *dentry; /* next dentry (for dcache readdir) */ - int dir_release_count; - int dir_ordered_count; + long long dir_release_count; + long long dir_ordered_count; + int readdir_cache_idx; /* used for -o dirstat read() on directory thing */ char *dir_info; int dir_info_len; }; - +struct ceph_readdir_cache_control { + struct page *page; + struct dentry **dentries; + int index; +}; /* * A "snap realm" describes a subset of the file hierarchy sharing @@ -687,6 +720,7 @@ static inline int default_congestion_kb(void) /* snap.c */ +extern struct ceph_snap_context *ceph_empty_snapc; struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, u64 ino); extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc, @@ -713,8 +747,8 @@ extern void ceph_snap_exit(void); static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) { return !list_empty(&ci->i_cap_snaps) && - list_entry(ci->i_cap_snaps.prev, struct ceph_cap_snap, - ci_item)->writing; + list_last_entry(&ci->i_cap_snaps, struct ceph_cap_snap, + ci_item)->writing; } /* inode.c */ @@ -838,12 +872,12 @@ extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); extern int ceph_is_any_caps(struct inode *inode); -extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino, - u64 cap_id, u32 migrate_seq, u32 issue_seq); extern void ceph_queue_caps_release(struct inode *inode); extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); extern int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync); +extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session); extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, @@ -879,6 +913,9 @@ extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode); /* addr.c */ extern const struct address_space_operations ceph_aops; extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); +extern int ceph_uninline_data(struct file *filp, struct page *locked_page); +extern int ceph_pool_perm_check(struct ceph_inode_info *ci, int need); +extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); /* file.c */ extern const struct file_operations ceph_file_fops; @@ -890,7 +927,6 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, extern int ceph_release(struct inode *inode, struct file *filp); extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len); -int ceph_uninline_data(struct file *filp, struct page *locked_page); /* dir.c */ extern const struct file_operations ceph_dir_fops; extern const struct file_operations ceph_snapdir_fops; @@ -911,6 +947,7 @@ extern void ceph_dentry_lru_del(struct dentry *dn); extern void ceph_invalidate_dentry_lease(struct dentry *dentry); extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn); extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry); +extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl); /* * our d_ops vary depending on whether the inode is live, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index cd7ffad4041d..819163d8313b 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -911,6 +911,8 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, struct inode *inode = d_inode(dentry); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf = NULL; int issued; int err; int dirty = 0; @@ -920,6 +922,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, char *newval = NULL; struct ceph_inode_xattr *xattr = NULL; int required_blob_size; + bool lock_snap_rwsem = false; if (!ceph_is_valid_xattr(name)) return -EOPNOTSUPP; @@ -948,12 +951,27 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, if (!xattr) goto out; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + goto out; + spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); - dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) goto do_sync; + + if (!lock_snap_rwsem && !ci->i_head_snapc) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + goto retry; + } + } + + dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); __build_xattrs(inode); required_blob_size = __get_required_blob_size(ci, name_len, val_len); @@ -966,7 +984,7 @@ retry: dout(" preaallocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) - goto out; + goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); @@ -978,21 +996,28 @@ retry: flags, value ? 1 : -1, &xattr); if (!err) { - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, + &prealloc_cf); ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; } spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); + ceph_free_cap_flush(prealloc_cf); return err; do_sync: spin_unlock(&ci->i_ceph_lock); do_sync_unlocked: + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); err = ceph_sync_setxattr(dentry, name, value, size, flags); out: + ceph_free_cap_flush(prealloc_cf); kfree(newname); kfree(newval); kfree(xattr); @@ -1044,10 +1069,13 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) struct inode *inode = d_inode(dentry); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf = NULL; int issued; int err; int required_blob_size; int dirty; + bool lock_snap_rwsem = false; if (!ceph_is_valid_xattr(name)) return -EOPNOTSUPP; @@ -1060,14 +1088,29 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) goto do_sync_unlocked; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); - dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); - if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) goto do_sync; + + if (!lock_snap_rwsem && !ci->i_head_snapc) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + goto retry; + } + } + + dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); + __build_xattrs(inode); required_blob_size = __get_required_blob_size(ci, 0, 0); @@ -1080,7 +1123,7 @@ retry: dout(" preaallocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) - goto out; + goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); @@ -1090,18 +1133,24 @@ retry: err = __remove_xattr_by_name(ceph_inode(inode), name); - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, + &prealloc_cf); ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); + ceph_free_cap_flush(prealloc_cf); return err; do_sync: spin_unlock(&ci->i_ceph_lock); do_sync_unlocked: + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); + ceph_free_cap_flush(prealloc_cf); err = ceph_send_removexattr(dentry, name); -out: return err; } diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index e5bbf748b698..eae2c11268bc 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -489,6 +489,7 @@ static void cuse_fc_release(struct fuse_conn *fc) */ static int cuse_channel_open(struct inode *inode, struct file *file) { + struct fuse_dev *fud; struct cuse_conn *cc; int rc; @@ -499,17 +500,22 @@ static int cuse_channel_open(struct inode *inode, struct file *file) fuse_conn_init(&cc->fc); + fud = fuse_dev_alloc(&cc->fc); + if (!fud) { + kfree(cc); + return -ENOMEM; + } + INIT_LIST_HEAD(&cc->list); cc->fc.release = cuse_fc_release; - cc->fc.connected = 1; cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { - fuse_conn_put(&cc->fc); + fuse_dev_free(fud); return rc; } - file->private_data = &cc->fc; /* channel owns base reference to cc */ + file->private_data = fud; return 0; } @@ -527,7 +533,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file) */ static int cuse_channel_release(struct inode *inode, struct file *file) { - struct cuse_conn *cc = fc_to_cc(file->private_data); + struct fuse_dev *fud = file->private_data; + struct cuse_conn *cc = fc_to_cc(fud->fc); int rc; /* remove from the conntbl, no more access from this point on */ diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c8b68ab2e574..80cc1b35d460 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -25,13 +25,13 @@ MODULE_ALIAS("devname:fuse"); static struct kmem_cache *fuse_req_cachep; -static struct fuse_conn *fuse_get_conn(struct file *file) +static struct fuse_dev *fuse_get_dev(struct file *file) { /* * Lockless access is OK, because file->private data is set * once during mount and is valid until the file is released. */ - return file->private_data; + return ACCESS_ONCE(file->private_data); } static void fuse_request_init(struct fuse_req *req, struct page **pages, @@ -48,6 +48,7 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages, req->pages = pages; req->page_descs = page_descs; req->max_pages = npages; + __set_bit(FR_PENDING, &req->flags); } static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) @@ -168,6 +169,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, if (!fc->connected) goto out; + err = -ECONNREFUSED; + if (fc->conn_error) + goto out; + req = fuse_request_alloc(npages); err = -ENOMEM; if (!req) { @@ -177,8 +182,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, } fuse_req_init_context(req); - req->waiting = 1; - req->background = for_background; + __set_bit(FR_WAITING, &req->flags); + if (for_background) + __set_bit(FR_BACKGROUND, &req->flags); + return req; out: @@ -268,15 +275,15 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, req = get_reserved_req(fc, file); fuse_req_init_context(req); - req->waiting = 1; - req->background = 0; + __set_bit(FR_WAITING, &req->flags); + __clear_bit(FR_BACKGROUND, &req->flags); return req; } void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) { if (atomic_dec_and_test(&req->count)) { - if (unlikely(req->background)) { + if (test_bit(FR_BACKGROUND, &req->flags)) { /* * We get here in the unlikely case that a background * request was allocated but not sent @@ -287,8 +294,10 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) spin_unlock(&fc->lock); } - if (req->waiting) + if (test_bit(FR_WAITING, &req->flags)) { + __clear_bit(FR_WAITING, &req->flags); atomic_dec(&fc->num_waiting); + } if (req->stolen_file) put_reserved_req(fc, req); @@ -309,46 +318,38 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args) return nbytes; } -static u64 fuse_get_unique(struct fuse_conn *fc) +static u64 fuse_get_unique(struct fuse_iqueue *fiq) { - fc->reqctr++; - /* zero is special */ - if (fc->reqctr == 0) - fc->reqctr = 1; - - return fc->reqctr; + return ++fiq->reqctr; } -static void queue_request(struct fuse_conn *fc, struct fuse_req *req) +static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req) { req->in.h.len = sizeof(struct fuse_in_header) + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); - list_add_tail(&req->list, &fc->pending); - req->state = FUSE_REQ_PENDING; - if (!req->waiting) { - req->waiting = 1; - atomic_inc(&fc->num_waiting); - } - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + list_add_tail(&req->list, &fiq->pending); + wake_up_locked(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, u64 nodeid, u64 nlookup) { + struct fuse_iqueue *fiq = &fc->iq; + forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; - spin_lock(&fc->lock); - if (fc->connected) { - fc->forget_list_tail->next = forget; - fc->forget_list_tail = forget; - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + spin_lock(&fiq->waitq.lock); + if (fiq->connected) { + fiq->forget_list_tail->next = forget; + fiq->forget_list_tail = forget; + wake_up_locked(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } else { kfree(forget); } - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); } static void flush_bg_queue(struct fuse_conn *fc) @@ -356,12 +357,15 @@ static void flush_bg_queue(struct fuse_conn *fc) while (fc->active_background < fc->max_background && !list_empty(&fc->bg_queue)) { struct fuse_req *req; + struct fuse_iqueue *fiq = &fc->iq; req = list_entry(fc->bg_queue.next, struct fuse_req, list); list_del(&req->list); fc->active_background++; - req->in.h.unique = fuse_get_unique(fc); - queue_request(fc, req); + spin_lock(&fiq->waitq.lock); + req->in.h.unique = fuse_get_unique(fiq); + queue_request(fiq, req); + spin_unlock(&fiq->waitq.lock); } } @@ -372,20 +376,22 @@ static void flush_bg_queue(struct fuse_conn *fc) * was closed. The requester thread is woken up (if still waiting), * the 'end' callback is called if given, else the reference to the * request is released - * - * Called with fc->lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) -__releases(fc->lock) { - void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; - req->end = NULL; - list_del(&req->list); - list_del(&req->intr_entry); - req->state = FUSE_REQ_FINISHED; - if (req->background) { - req->background = 0; + struct fuse_iqueue *fiq = &fc->iq; + + if (test_and_set_bit(FR_FINISHED, &req->flags)) + return; + spin_lock(&fiq->waitq.lock); + list_del_init(&req->intr_entry); + spin_unlock(&fiq->waitq.lock); + WARN_ON(test_bit(FR_PENDING, &req->flags)); + WARN_ON(test_bit(FR_SENT, &req->flags)); + if (test_bit(FR_BACKGROUND, &req->flags)) { + spin_lock(&fc->lock); + clear_bit(FR_BACKGROUND, &req->flags); if (fc->num_background == fc->max_background) fc->blocked = 0; @@ -401,122 +407,105 @@ __releases(fc->lock) fc->num_background--; fc->active_background--; flush_bg_queue(fc); + spin_unlock(&fc->lock); } - spin_unlock(&fc->lock); wake_up(&req->waitq); - if (end) - end(fc, req); + if (req->end) + req->end(fc, req); fuse_put_request(fc, req); } -static void wait_answer_interruptible(struct fuse_conn *fc, - struct fuse_req *req) -__releases(fc->lock) -__acquires(fc->lock) -{ - if (signal_pending(current)) - return; - - spin_unlock(&fc->lock); - wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); - spin_lock(&fc->lock); -} - -static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) +static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { - list_add_tail(&req->intr_entry, &fc->interrupts); - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + spin_lock(&fiq->waitq.lock); + if (list_empty(&req->intr_entry)) { + list_add_tail(&req->intr_entry, &fiq->interrupts); + wake_up_locked(&fiq->waitq); + } + spin_unlock(&fiq->waitq.lock); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) -__releases(fc->lock) -__acquires(fc->lock) { + struct fuse_iqueue *fiq = &fc->iq; + int err; + if (!fc->no_interrupt) { /* Any signal may interrupt this */ - wait_answer_interruptible(fc, req); - - if (req->aborted) - goto aborted; - if (req->state == FUSE_REQ_FINISHED) + err = wait_event_interruptible(req->waitq, + test_bit(FR_FINISHED, &req->flags)); + if (!err) return; - req->interrupted = 1; - if (req->state == FUSE_REQ_SENT) - queue_interrupt(fc, req); + set_bit(FR_INTERRUPTED, &req->flags); + /* matches barrier in fuse_dev_do_read() */ + smp_mb__after_atomic(); + if (test_bit(FR_SENT, &req->flags)) + queue_interrupt(fiq, req); } - if (!req->force) { + if (!test_bit(FR_FORCE, &req->flags)) { sigset_t oldset; /* Only fatal signals may interrupt this */ block_sigs(&oldset); - wait_answer_interruptible(fc, req); + err = wait_event_interruptible(req->waitq, + test_bit(FR_FINISHED, &req->flags)); restore_sigs(&oldset); - if (req->aborted) - goto aborted; - if (req->state == FUSE_REQ_FINISHED) + if (!err) return; + spin_lock(&fiq->waitq.lock); /* Request is not yet in userspace, bail out */ - if (req->state == FUSE_REQ_PENDING) { + if (test_bit(FR_PENDING, &req->flags)) { list_del(&req->list); + spin_unlock(&fiq->waitq.lock); __fuse_put_request(req); req->out.h.error = -EINTR; return; } + spin_unlock(&fiq->waitq.lock); } /* * Either request is already in userspace, or it was forced. * Wait it out. */ - spin_unlock(&fc->lock); - wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); - spin_lock(&fc->lock); - - if (!req->aborted) - return; - - aborted: - BUG_ON(req->state != FUSE_REQ_FINISHED); - if (req->locked) { - /* This is uninterruptible sleep, because data is - being copied to/from the buffers of req. During - locked state, there mustn't be any filesystem - operation (e.g. page fault), since that could lead - to deadlock */ - spin_unlock(&fc->lock); - wait_event(req->waitq, !req->locked); - spin_lock(&fc->lock); - } + wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags)); } static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { - BUG_ON(req->background); - spin_lock(&fc->lock); - if (!fc->connected) + struct fuse_iqueue *fiq = &fc->iq; + + BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); + spin_lock(&fiq->waitq.lock); + if (!fiq->connected) { + spin_unlock(&fiq->waitq.lock); req->out.h.error = -ENOTCONN; - else if (fc->conn_error) - req->out.h.error = -ECONNREFUSED; - else { - req->in.h.unique = fuse_get_unique(fc); - queue_request(fc, req); + } else { + req->in.h.unique = fuse_get_unique(fiq); + queue_request(fiq, req); /* acquire extra reference, since request is still needed after request_end() */ __fuse_get_request(req); + spin_unlock(&fiq->waitq.lock); request_wait_answer(fc, req); + /* Pairs with smp_wmb() in request_end() */ + smp_rmb(); } - spin_unlock(&fc->lock); } void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { - req->isreply = 1; + __set_bit(FR_ISREPLY, &req->flags); + if (!test_bit(FR_WAITING, &req->flags)) { + __set_bit(FR_WAITING, &req->flags); + atomic_inc(&fc->num_waiting); + } __fuse_request_send(fc, req); } EXPORT_SYMBOL_GPL(fuse_request_send); @@ -586,10 +575,20 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) return ret; } -static void fuse_request_send_nowait_locked(struct fuse_conn *fc, - struct fuse_req *req) +/* + * Called under fc->lock + * + * fc->connected must have been checked previously + */ +void fuse_request_send_background_locked(struct fuse_conn *fc, + struct fuse_req *req) { - BUG_ON(!req->background); + BUG_ON(!test_bit(FR_BACKGROUND, &req->flags)); + if (!test_bit(FR_WAITING, &req->flags)) { + __set_bit(FR_WAITING, &req->flags); + atomic_inc(&fc->num_waiting); + } + __set_bit(FR_ISREPLY, &req->flags); fc->num_background++; if (fc->num_background == fc->max_background) fc->blocked = 1; @@ -602,54 +601,40 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, flush_bg_queue(fc); } -static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) +void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) { + BUG_ON(!req->end); spin_lock(&fc->lock); if (fc->connected) { - fuse_request_send_nowait_locked(fc, req); + fuse_request_send_background_locked(fc, req); spin_unlock(&fc->lock); } else { + spin_unlock(&fc->lock); req->out.h.error = -ENOTCONN; - request_end(fc, req); + req->end(fc, req); + fuse_put_request(fc, req); } } - -void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) -{ - req->isreply = 1; - fuse_request_send_nowait(fc, req); -} EXPORT_SYMBOL_GPL(fuse_request_send_background); static int fuse_request_send_notify_reply(struct fuse_conn *fc, struct fuse_req *req, u64 unique) { int err = -ENODEV; + struct fuse_iqueue *fiq = &fc->iq; - req->isreply = 0; + __clear_bit(FR_ISREPLY, &req->flags); req->in.h.unique = unique; - spin_lock(&fc->lock); - if (fc->connected) { - queue_request(fc, req); + spin_lock(&fiq->waitq.lock); + if (fiq->connected) { + queue_request(fiq, req); err = 0; } - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); return err; } -/* - * Called under fc->lock - * - * fc->connected must have been checked previously - */ -void fuse_request_send_background_locked(struct fuse_conn *fc, - struct fuse_req *req) -{ - req->isreply = 1; - fuse_request_send_nowait_locked(fc, req); -} - void fuse_force_forget(struct file *file, u64 nodeid) { struct inode *inode = file_inode(file); @@ -665,7 +650,7 @@ void fuse_force_forget(struct file *file, u64 nodeid) req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; - req->isreply = 0; + __clear_bit(FR_ISREPLY, &req->flags); __fuse_request_send(fc, req); /* ignore errors */ fuse_put_request(fc, req); @@ -676,38 +661,39 @@ void fuse_force_forget(struct file *file, u64 nodeid) * anything that could cause a page-fault. If the request was already * aborted bail out. */ -static int lock_request(struct fuse_conn *fc, struct fuse_req *req) +static int lock_request(struct fuse_req *req) { int err = 0; if (req) { - spin_lock(&fc->lock); - if (req->aborted) + spin_lock(&req->waitq.lock); + if (test_bit(FR_ABORTED, &req->flags)) err = -ENOENT; else - req->locked = 1; - spin_unlock(&fc->lock); + set_bit(FR_LOCKED, &req->flags); + spin_unlock(&req->waitq.lock); } return err; } /* - * Unlock request. If it was aborted during being locked, the - * requester thread is currently waiting for it to be unlocked, so - * wake it up. + * Unlock request. If it was aborted while locked, caller is responsible + * for unlocking and ending the request. */ -static void unlock_request(struct fuse_conn *fc, struct fuse_req *req) +static int unlock_request(struct fuse_req *req) { + int err = 0; if (req) { - spin_lock(&fc->lock); - req->locked = 0; - if (req->aborted) - wake_up(&req->waitq); - spin_unlock(&fc->lock); + spin_lock(&req->waitq.lock); + if (test_bit(FR_ABORTED, &req->flags)) + err = -ENOENT; + else + clear_bit(FR_LOCKED, &req->flags); + spin_unlock(&req->waitq.lock); } + return err; } struct fuse_copy_state { - struct fuse_conn *fc; int write; struct fuse_req *req; struct iov_iter *iter; @@ -721,13 +707,10 @@ struct fuse_copy_state { unsigned move_pages:1; }; -static void fuse_copy_init(struct fuse_copy_state *cs, - struct fuse_conn *fc, - int write, +static void fuse_copy_init(struct fuse_copy_state *cs, int write, struct iov_iter *iter) { memset(cs, 0, sizeof(*cs)); - cs->fc = fc; cs->write = write; cs->iter = iter; } @@ -760,7 +743,10 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) struct page *page; int err; - unlock_request(cs->fc, cs->req); + err = unlock_request(cs->req); + if (err) + return err; + fuse_copy_finish(cs); if (cs->pipebufs) { struct pipe_buffer *buf = cs->pipebufs; @@ -809,7 +795,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) iov_iter_advance(cs->iter, err); } - return lock_request(cs->fc, cs->req); + return lock_request(cs->req); } /* Do as much copy to/from userspace buffer as we can */ @@ -860,7 +846,10 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) struct page *newpage; struct pipe_buffer *buf = cs->pipebufs; - unlock_request(cs->fc, cs->req); + err = unlock_request(cs->req); + if (err) + return err; + fuse_copy_finish(cs); err = buf->ops->confirm(cs->pipe, buf); @@ -914,12 +903,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) lru_cache_add_file(newpage); err = 0; - spin_lock(&cs->fc->lock); - if (cs->req->aborted) + spin_lock(&cs->req->waitq.lock); + if (test_bit(FR_ABORTED, &cs->req->flags)) err = -ENOENT; else *pagep = newpage; - spin_unlock(&cs->fc->lock); + spin_unlock(&cs->req->waitq.lock); if (err) { unlock_page(newpage); @@ -939,7 +928,7 @@ out_fallback: cs->pg = buf->page; cs->offset = buf->offset; - err = lock_request(cs->fc, cs->req); + err = lock_request(cs->req); if (err) return err; @@ -950,11 +939,15 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, unsigned offset, unsigned count) { struct pipe_buffer *buf; + int err; if (cs->nr_segs == cs->pipe->buffers) return -EIO; - unlock_request(cs->fc, cs->req); + err = unlock_request(cs->req); + if (err) + return err; + fuse_copy_finish(cs); buf = cs->pipebufs; @@ -1065,36 +1058,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, return err; } -static int forget_pending(struct fuse_conn *fc) +static int forget_pending(struct fuse_iqueue *fiq) { - return fc->forget_list_head.next != NULL; + return fiq->forget_list_head.next != NULL; } -static int request_pending(struct fuse_conn *fc) +static int request_pending(struct fuse_iqueue *fiq) { - return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) || - forget_pending(fc); -} - -/* Wait until a request is available on the pending list */ -static void request_wait(struct fuse_conn *fc) -__releases(fc->lock) -__acquires(fc->lock) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue_exclusive(&fc->waitq, &wait); - while (fc->connected && !request_pending(fc)) { - set_current_state(TASK_INTERRUPTIBLE); - if (signal_pending(current)) - break; - - spin_unlock(&fc->lock); - schedule(); - spin_lock(&fc->lock); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&fc->waitq, &wait); + return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) || + forget_pending(fiq); } /* @@ -1103,11 +1075,12 @@ __acquires(fc->lock) * Unlike other requests this is assembled on demand, without a need * to allocate a separate fuse_req structure. * - * Called with fc->lock held, releases it + * Called with fiq->waitq.lock held, releases it */ -static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, +static int fuse_read_interrupt(struct fuse_iqueue *fiq, + struct fuse_copy_state *cs, size_t nbytes, struct fuse_req *req) -__releases(fc->lock) +__releases(fiq->waitq.lock) { struct fuse_in_header ih; struct fuse_interrupt_in arg; @@ -1115,7 +1088,7 @@ __releases(fc->lock) int err; list_del_init(&req->intr_entry); - req->intr_unique = fuse_get_unique(fc); + req->intr_unique = fuse_get_unique(fiq); memset(&ih, 0, sizeof(ih)); memset(&arg, 0, sizeof(arg)); ih.len = reqsize; @@ -1123,7 +1096,7 @@ __releases(fc->lock) ih.unique = req->intr_unique; arg.unique = req->in.h.unique; - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); if (nbytes < reqsize) return -EINVAL; @@ -1135,21 +1108,21 @@ __releases(fc->lock) return err ? err : reqsize; } -static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, +static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, unsigned max, unsigned *countp) { - struct fuse_forget_link *head = fc->forget_list_head.next; + struct fuse_forget_link *head = fiq->forget_list_head.next; struct fuse_forget_link **newhead = &head; unsigned count; for (count = 0; *newhead != NULL && count < max; count++) newhead = &(*newhead)->next; - fc->forget_list_head.next = *newhead; + fiq->forget_list_head.next = *newhead; *newhead = NULL; - if (fc->forget_list_head.next == NULL) - fc->forget_list_tail = &fc->forget_list_head; + if (fiq->forget_list_head.next == NULL) + fiq->forget_list_tail = &fiq->forget_list_head; if (countp != NULL) *countp = count; @@ -1157,24 +1130,24 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, return head; } -static int fuse_read_single_forget(struct fuse_conn *fc, +static int fuse_read_single_forget(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) -__releases(fc->lock) +__releases(fiq->waitq.lock) { int err; - struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL); + struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL); struct fuse_forget_in arg = { .nlookup = forget->forget_one.nlookup, }; struct fuse_in_header ih = { .opcode = FUSE_FORGET, .nodeid = forget->forget_one.nodeid, - .unique = fuse_get_unique(fc), + .unique = fuse_get_unique(fiq), .len = sizeof(ih) + sizeof(arg), }; - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); kfree(forget); if (nbytes < ih.len) return -EINVAL; @@ -1190,9 +1163,9 @@ __releases(fc->lock) return ih.len; } -static int fuse_read_batch_forget(struct fuse_conn *fc, +static int fuse_read_batch_forget(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) -__releases(fc->lock) +__releases(fiq->waitq.lock) { int err; unsigned max_forgets; @@ -1201,18 +1174,18 @@ __releases(fc->lock) struct fuse_batch_forget_in arg = { .count = 0 }; struct fuse_in_header ih = { .opcode = FUSE_BATCH_FORGET, - .unique = fuse_get_unique(fc), + .unique = fuse_get_unique(fiq), .len = sizeof(ih) + sizeof(arg), }; if (nbytes < ih.len) { - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); return -EINVAL; } max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); - head = dequeue_forget(fc, max_forgets, &count); - spin_unlock(&fc->lock); + head = dequeue_forget(fiq, max_forgets, &count); + spin_unlock(&fiq->waitq.lock); arg.count = count; ih.len += count * sizeof(struct fuse_forget_one); @@ -1239,14 +1212,15 @@ __releases(fc->lock) return ih.len; } -static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, +static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq, + struct fuse_copy_state *cs, size_t nbytes) -__releases(fc->lock) +__releases(fiq->waitq.lock) { - if (fc->minor < 16 || fc->forget_list_head.next->next == NULL) - return fuse_read_single_forget(fc, cs, nbytes); + if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) + return fuse_read_single_forget(fiq, cs, nbytes); else - return fuse_read_batch_forget(fc, cs, nbytes); + return fuse_read_batch_forget(fiq, cs, nbytes); } /* @@ -1258,46 +1232,51 @@ __releases(fc->lock) * request_end(). Otherwise add it to the processing list, and set * the 'sent' flag. */ -static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, +static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, struct fuse_copy_state *cs, size_t nbytes) { - int err; + ssize_t err; + struct fuse_conn *fc = fud->fc; + struct fuse_iqueue *fiq = &fc->iq; + struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_in *in; unsigned reqsize; restart: - spin_lock(&fc->lock); + spin_lock(&fiq->waitq.lock); err = -EAGAIN; - if ((file->f_flags & O_NONBLOCK) && fc->connected && - !request_pending(fc)) + if ((file->f_flags & O_NONBLOCK) && fiq->connected && + !request_pending(fiq)) goto err_unlock; - request_wait(fc); - err = -ENODEV; - if (!fc->connected) + err = wait_event_interruptible_exclusive_locked(fiq->waitq, + !fiq->connected || request_pending(fiq)); + if (err) goto err_unlock; - err = -ERESTARTSYS; - if (!request_pending(fc)) + + err = -ENODEV; + if (!fiq->connected) goto err_unlock; - if (!list_empty(&fc->interrupts)) { - req = list_entry(fc->interrupts.next, struct fuse_req, + if (!list_empty(&fiq->interrupts)) { + req = list_entry(fiq->interrupts.next, struct fuse_req, intr_entry); - return fuse_read_interrupt(fc, cs, nbytes, req); + return fuse_read_interrupt(fiq, cs, nbytes, req); } - if (forget_pending(fc)) { - if (list_empty(&fc->pending) || fc->forget_batch-- > 0) - return fuse_read_forget(fc, cs, nbytes); + if (forget_pending(fiq)) { + if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0) + return fuse_read_forget(fc, fiq, cs, nbytes); - if (fc->forget_batch <= -8) - fc->forget_batch = 16; + if (fiq->forget_batch <= -8) + fiq->forget_batch = 16; } - req = list_entry(fc->pending.next, struct fuse_req, list); - req->state = FUSE_REQ_READING; - list_move(&req->list, &fc->io); + req = list_entry(fiq->pending.next, struct fuse_req, list); + clear_bit(FR_PENDING, &req->flags); + list_del_init(&req->list); + spin_unlock(&fiq->waitq.lock); in = &req->in; reqsize = in->h.len; @@ -1310,37 +1289,48 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, request_end(fc, req); goto restart; } - spin_unlock(&fc->lock); + spin_lock(&fpq->lock); + list_add(&req->list, &fpq->io); + spin_unlock(&fpq->lock); cs->req = req; err = fuse_copy_one(cs, &in->h, sizeof(in->h)); if (!err) err = fuse_copy_args(cs, in->numargs, in->argpages, (struct fuse_arg *) in->args, 0); fuse_copy_finish(cs); - spin_lock(&fc->lock); - req->locked = 0; - if (req->aborted) { - request_end(fc, req); - return -ENODEV; + spin_lock(&fpq->lock); + clear_bit(FR_LOCKED, &req->flags); + if (!fpq->connected) { + err = -ENODEV; + goto out_end; } if (err) { req->out.h.error = -EIO; - request_end(fc, req); - return err; + goto out_end; } - if (!req->isreply) - request_end(fc, req); - else { - req->state = FUSE_REQ_SENT; - list_move_tail(&req->list, &fc->processing); - if (req->interrupted) - queue_interrupt(fc, req); - spin_unlock(&fc->lock); + if (!test_bit(FR_ISREPLY, &req->flags)) { + err = reqsize; + goto out_end; } + list_move_tail(&req->list, &fpq->processing); + spin_unlock(&fpq->lock); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); + if (test_bit(FR_INTERRUPTED, &req->flags)) + queue_interrupt(fiq, req); + return reqsize; +out_end: + if (!test_bit(FR_PRIVATE, &req->flags)) + list_del_init(&req->list); + spin_unlock(&fpq->lock); + request_end(fc, req); + return err; + err_unlock: - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); return err; } @@ -1359,16 +1349,17 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) { struct fuse_copy_state cs; struct file *file = iocb->ki_filp; - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(file); + + if (!fud) return -EPERM; if (!iter_is_iovec(to)) return -EINVAL; - fuse_copy_init(&cs, fc, 1, to); + fuse_copy_init(&cs, 1, to); - return fuse_dev_do_read(fc, file, &cs, iov_iter_count(to)); + return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to)); } static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, @@ -1380,18 +1371,19 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, int do_wakeup = 0; struct pipe_buffer *bufs; struct fuse_copy_state cs; - struct fuse_conn *fc = fuse_get_conn(in); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(in); + + if (!fud) return -EPERM; bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) return -ENOMEM; - fuse_copy_init(&cs, fc, 1, NULL); + fuse_copy_init(&cs, 1, NULL); cs.pipebufs = bufs; cs.pipe = pipe; - ret = fuse_dev_do_read(fc, in, &cs, len); + ret = fuse_dev_do_read(fud, in, &cs, len); if (ret < 0) goto out; @@ -1830,11 +1822,11 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, } /* Look up request on processing list by unique ID */ -static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) +static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique) { struct fuse_req *req; - list_for_each_entry(req, &fc->processing, list) { + list_for_each_entry(req, &fpq->processing, list) { if (req->in.h.unique == unique || req->intr_unique == unique) return req; } @@ -1871,10 +1863,12 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, * it from the list and copy the rest of the buffer to the request. * The request is finished by calling request_end() */ -static ssize_t fuse_dev_do_write(struct fuse_conn *fc, +static ssize_t fuse_dev_do_write(struct fuse_dev *fud, struct fuse_copy_state *cs, size_t nbytes) { int err; + struct fuse_conn *fc = fud->fc; + struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_out_header oh; @@ -1902,63 +1896,60 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, if (oh.error <= -1000 || oh.error > 0) goto err_finish; - spin_lock(&fc->lock); + spin_lock(&fpq->lock); err = -ENOENT; - if (!fc->connected) - goto err_unlock; + if (!fpq->connected) + goto err_unlock_pq; - req = request_find(fc, oh.unique); + req = request_find(fpq, oh.unique); if (!req) - goto err_unlock; + goto err_unlock_pq; - if (req->aborted) { - spin_unlock(&fc->lock); - fuse_copy_finish(cs); - spin_lock(&fc->lock); - request_end(fc, req); - return -ENOENT; - } /* Is it an interrupt reply? */ if (req->intr_unique == oh.unique) { + spin_unlock(&fpq->lock); + err = -EINVAL; if (nbytes != sizeof(struct fuse_out_header)) - goto err_unlock; + goto err_finish; if (oh.error == -ENOSYS) fc->no_interrupt = 1; else if (oh.error == -EAGAIN) - queue_interrupt(fc, req); + queue_interrupt(&fc->iq, req); - spin_unlock(&fc->lock); fuse_copy_finish(cs); return nbytes; } - req->state = FUSE_REQ_WRITING; - list_move(&req->list, &fc->io); + clear_bit(FR_SENT, &req->flags); + list_move(&req->list, &fpq->io); req->out.h = oh; - req->locked = 1; + set_bit(FR_LOCKED, &req->flags); + spin_unlock(&fpq->lock); cs->req = req; if (!req->out.page_replace) cs->move_pages = 0; - spin_unlock(&fc->lock); err = copy_out_args(cs, &req->out, nbytes); fuse_copy_finish(cs); - spin_lock(&fc->lock); - req->locked = 0; - if (!err) { - if (req->aborted) - err = -ENOENT; - } else if (!req->aborted) + spin_lock(&fpq->lock); + clear_bit(FR_LOCKED, &req->flags); + if (!fpq->connected) + err = -ENOENT; + else if (err) req->out.h.error = -EIO; + if (!test_bit(FR_PRIVATE, &req->flags)) + list_del_init(&req->list); + spin_unlock(&fpq->lock); + request_end(fc, req); return err ? err : nbytes; - err_unlock: - spin_unlock(&fc->lock); + err_unlock_pq: + spin_unlock(&fpq->lock); err_finish: fuse_copy_finish(cs); return err; @@ -1967,16 +1958,17 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) { struct fuse_copy_state cs; - struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp); + + if (!fud) return -EPERM; if (!iter_is_iovec(from)) return -EINVAL; - fuse_copy_init(&cs, fc, 0, from); + fuse_copy_init(&cs, 0, from); - return fuse_dev_do_write(fc, &cs, iov_iter_count(from)); + return fuse_dev_do_write(fud, &cs, iov_iter_count(from)); } static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, @@ -1987,12 +1979,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, unsigned idx; struct pipe_buffer *bufs; struct fuse_copy_state cs; - struct fuse_conn *fc; + struct fuse_dev *fud; size_t rem; ssize_t ret; - fc = fuse_get_conn(out); - if (!fc) + fud = fuse_get_dev(out); + if (!fud) return -EPERM; bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); @@ -2039,7 +2031,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, } pipe_unlock(pipe); - fuse_copy_init(&cs, fc, 0, NULL); + fuse_copy_init(&cs, 0, NULL); cs.pipebufs = bufs; cs.nr_segs = nbuf; cs.pipe = pipe; @@ -2047,7 +2039,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, if (flags & SPLICE_F_MOVE) cs.move_pages = 1; - ret = fuse_dev_do_write(fc, &cs, len); + ret = fuse_dev_do_write(fud, &cs, len); for (idx = 0; idx < nbuf; idx++) { struct pipe_buffer *buf = &bufs[idx]; @@ -2061,18 +2053,21 @@ out: static unsigned fuse_dev_poll(struct file *file, poll_table *wait) { unsigned mask = POLLOUT | POLLWRNORM; - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) + struct fuse_iqueue *fiq; + struct fuse_dev *fud = fuse_get_dev(file); + + if (!fud) return POLLERR; - poll_wait(file, &fc->waitq, wait); + fiq = &fud->fc->iq; + poll_wait(file, &fiq->waitq, wait); - spin_lock(&fc->lock); - if (!fc->connected) + spin_lock(&fiq->waitq.lock); + if (!fiq->connected) mask = POLLERR; - else if (request_pending(fc)) + else if (request_pending(fiq)) mask |= POLLIN | POLLRDNORM; - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); return mask; } @@ -2083,67 +2078,18 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) * This function releases and reacquires fc->lock */ static void end_requests(struct fuse_conn *fc, struct list_head *head) -__releases(fc->lock) -__acquires(fc->lock) { while (!list_empty(head)) { struct fuse_req *req; req = list_entry(head->next, struct fuse_req, list); req->out.h.error = -ECONNABORTED; - request_end(fc, req); - spin_lock(&fc->lock); - } -} - -/* - * Abort requests under I/O - * - * The requests are set to aborted and finished, and the request - * waiter is woken up. This will make request_wait_answer() wait - * until the request is unlocked and then return. - * - * If the request is asynchronous, then the end function needs to be - * called after waiting for the request to be unlocked (if it was - * locked). - */ -static void end_io_requests(struct fuse_conn *fc) -__releases(fc->lock) -__acquires(fc->lock) -{ - while (!list_empty(&fc->io)) { - struct fuse_req *req = - list_entry(fc->io.next, struct fuse_req, list); - void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; - - req->aborted = 1; - req->out.h.error = -ECONNABORTED; - req->state = FUSE_REQ_FINISHED; + clear_bit(FR_PENDING, &req->flags); + clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); - wake_up(&req->waitq); - if (end) { - req->end = NULL; - __fuse_get_request(req); - spin_unlock(&fc->lock); - wait_event(req->waitq, !req->locked); - end(fc, req); - fuse_put_request(fc, req); - spin_lock(&fc->lock); - } + request_end(fc, req); } } -static void end_queued_requests(struct fuse_conn *fc) -__releases(fc->lock) -__acquires(fc->lock) -{ - fc->max_background = UINT_MAX; - flush_bg_queue(fc); - end_requests(fc, &fc->pending); - end_requests(fc, &fc->processing); - while (forget_pending(fc)) - kfree(dequeue_forget(fc, 1, NULL)); -} - static void end_polls(struct fuse_conn *fc) { struct rb_node *p; @@ -2162,67 +2108,156 @@ static void end_polls(struct fuse_conn *fc) /* * Abort all requests. * - * Emergency exit in case of a malicious or accidental deadlock, or - * just a hung filesystem. + * Emergency exit in case of a malicious or accidental deadlock, or just a hung + * filesystem. * - * The same effect is usually achievable through killing the - * filesystem daemon and all users of the filesystem. The exception - * is the combination of an asynchronous request and the tricky - * deadlock (see Documentation/filesystems/fuse.txt). + * The same effect is usually achievable through killing the filesystem daemon + * and all users of the filesystem. The exception is the combination of an + * asynchronous request and the tricky deadlock (see + * Documentation/filesystems/fuse.txt). * - * During the aborting, progression of requests from the pending and - * processing lists onto the io list, and progression of new requests - * onto the pending list is prevented by req->connected being false. - * - * Progression of requests under I/O to the processing list is - * prevented by the req->aborted flag being true for these requests. - * For this reason requests on the io list must be aborted first. + * Aborting requests under I/O goes as follows: 1: Separate out unlocked + * requests, they should be finished off immediately. Locked requests will be + * finished after unlock; see unlock_request(). 2: Finish off the unlocked + * requests. It is possible that some request will finish before we can. This + * is OK, the request will in that case be removed from the list before we touch + * it. */ void fuse_abort_conn(struct fuse_conn *fc) { + struct fuse_iqueue *fiq = &fc->iq; + spin_lock(&fc->lock); if (fc->connected) { + struct fuse_dev *fud; + struct fuse_req *req, *next; + LIST_HEAD(to_end1); + LIST_HEAD(to_end2); + fc->connected = 0; fc->blocked = 0; fuse_set_initialized(fc); - end_io_requests(fc); - end_queued_requests(fc); + list_for_each_entry(fud, &fc->devices, entry) { + struct fuse_pqueue *fpq = &fud->pq; + + spin_lock(&fpq->lock); + fpq->connected = 0; + list_for_each_entry_safe(req, next, &fpq->io, list) { + req->out.h.error = -ECONNABORTED; + spin_lock(&req->waitq.lock); + set_bit(FR_ABORTED, &req->flags); + if (!test_bit(FR_LOCKED, &req->flags)) { + set_bit(FR_PRIVATE, &req->flags); + list_move(&req->list, &to_end1); + } + spin_unlock(&req->waitq.lock); + } + list_splice_init(&fpq->processing, &to_end2); + spin_unlock(&fpq->lock); + } + fc->max_background = UINT_MAX; + flush_bg_queue(fc); + + spin_lock(&fiq->waitq.lock); + fiq->connected = 0; + list_splice_init(&fiq->pending, &to_end2); + while (forget_pending(fiq)) + kfree(dequeue_forget(fiq, 1, NULL)); + wake_up_all_locked(&fiq->waitq); + spin_unlock(&fiq->waitq.lock); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); end_polls(fc); - wake_up_all(&fc->waitq); wake_up_all(&fc->blocked_waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + spin_unlock(&fc->lock); + + while (!list_empty(&to_end1)) { + req = list_first_entry(&to_end1, struct fuse_req, list); + __fuse_get_request(req); + list_del_init(&req->list); + request_end(fc, req); + } + end_requests(fc, &to_end2); + } else { + spin_unlock(&fc->lock); } - spin_unlock(&fc->lock); } EXPORT_SYMBOL_GPL(fuse_abort_conn); int fuse_dev_release(struct inode *inode, struct file *file) { - struct fuse_conn *fc = fuse_get_conn(file); - if (fc) { - spin_lock(&fc->lock); - fc->connected = 0; - fc->blocked = 0; - fuse_set_initialized(fc); - end_queued_requests(fc); - end_polls(fc); - wake_up_all(&fc->blocked_waitq); - spin_unlock(&fc->lock); - fuse_conn_put(fc); - } + struct fuse_dev *fud = fuse_get_dev(file); + if (fud) { + struct fuse_conn *fc = fud->fc; + struct fuse_pqueue *fpq = &fud->pq; + + WARN_ON(!list_empty(&fpq->io)); + end_requests(fc, &fpq->processing); + /* Are we the last open device? */ + if (atomic_dec_and_test(&fc->dev_count)) { + WARN_ON(fc->iq.fasync != NULL); + fuse_abort_conn(fc); + } + fuse_dev_free(fud); + } return 0; } EXPORT_SYMBOL_GPL(fuse_dev_release); static int fuse_dev_fasync(int fd, struct file *file, int on) { - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(file); + + if (!fud) return -EPERM; /* No locking - fasync_helper does its own locking */ - return fasync_helper(fd, file, on, &fc->fasync); + return fasync_helper(fd, file, on, &fud->fc->iq.fasync); +} + +static int fuse_device_clone(struct fuse_conn *fc, struct file *new) +{ + struct fuse_dev *fud; + + if (new->private_data) + return -EINVAL; + + fud = fuse_dev_alloc(fc); + if (!fud) + return -ENOMEM; + + new->private_data = fud; + atomic_inc(&fc->dev_count); + + return 0; +} + +static long fuse_dev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int err = -ENOTTY; + + if (cmd == FUSE_DEV_IOC_CLONE) { + int oldfd; + + err = -EFAULT; + if (!get_user(oldfd, (__u32 __user *) arg)) { + struct file *old = fget(oldfd); + + err = -EINVAL; + if (old) { + struct fuse_dev *fud = fuse_get_dev(old); + + if (fud) { + mutex_lock(&fuse_mutex); + err = fuse_device_clone(fud->fc, file); + mutex_unlock(&fuse_mutex); + } + fput(old); + } + } + } + return err; } const struct file_operations fuse_dev_operations = { @@ -2236,6 +2271,8 @@ const struct file_operations fuse_dev_operations = { .poll = fuse_dev_poll, .release = fuse_dev_release, .fasync = fuse_dev_fasync, + .unlocked_ioctl = fuse_dev_ioctl, + .compat_ioctl = fuse_dev_ioctl, }; EXPORT_SYMBOL_GPL(fuse_dev_operations); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8c5e2fa68835..014fa8ba2b51 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -96,17 +96,17 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) * Drop the release request when client does not * implement 'open' */ - req->background = 0; + __clear_bit(FR_BACKGROUND, &req->flags); iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else if (sync) { - req->background = 0; + __clear_bit(FR_BACKGROUND, &req->flags); fuse_request_send(ff->fc, req); iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else { req->end = fuse_release_end; - req->background = 1; + __set_bit(FR_BACKGROUND, &req->flags); fuse_request_send_background(ff->fc, req); } kfree(ff); @@ -299,8 +299,8 @@ void fuse_sync_release(struct fuse_file *ff, int flags) { WARN_ON(atomic_read(&ff->count) > 1); fuse_prepare_release(ff, flags, FUSE_RELEASE); - ff->reserved_req->force = 1; - ff->reserved_req->background = 0; + __set_bit(FR_FORCE, &ff->reserved_req->flags); + __clear_bit(FR_BACKGROUND, &ff->reserved_req->flags); fuse_request_send(ff->fc, ff->reserved_req); fuse_put_request(ff->fc, ff->reserved_req); kfree(ff); @@ -426,7 +426,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; - req->force = 1; + __set_bit(FR_FORCE, &req->flags); fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); @@ -1611,7 +1611,8 @@ static int fuse_writepage_locked(struct page *page) if (!req) goto err; - req->background = 1; /* writeback always goes to bg_queue */ + /* writeback always goes to bg_queue */ + __set_bit(FR_BACKGROUND, &req->flags); tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!tmp_page) goto err_free; @@ -1742,8 +1743,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, } } - if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || - old_req->state == FUSE_REQ_PENDING)) { + if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) { struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host); copy_highpage(old_req->pages[0], page); @@ -1830,7 +1830,7 @@ static int fuse_writepages_fill(struct page *page, req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; req->misc.write.next = NULL; req->in.argpages = 1; - req->background = 1; + __set_bit(FR_BACKGROUND, &req->flags); req->num_pages = 0; req->end = fuse_writepage_end; req->inode = inode; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7354dc142a50..405113101db8 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -241,16 +241,6 @@ struct fuse_args { #define FUSE_ARGS(args) struct fuse_args args = {} -/** The request state */ -enum fuse_req_state { - FUSE_REQ_INIT = 0, - FUSE_REQ_PENDING, - FUSE_REQ_READING, - FUSE_REQ_SENT, - FUSE_REQ_WRITING, - FUSE_REQ_FINISHED -}; - /** The request IO state (for asynchronous processing) */ struct fuse_io_priv { int async; @@ -267,7 +257,40 @@ struct fuse_io_priv { }; /** + * Request flags + * + * FR_ISREPLY: set if the request has reply + * FR_FORCE: force sending of the request even if interrupted + * FR_BACKGROUND: request is sent in the background + * FR_WAITING: request is counted as "waiting" + * FR_ABORTED: the request was aborted + * FR_INTERRUPTED: the request has been interrupted + * FR_LOCKED: data is being copied to/from the request + * FR_PENDING: request is not yet in userspace + * FR_SENT: request is in userspace, waiting for an answer + * FR_FINISHED: request is finished + * FR_PRIVATE: request is on private list + */ +enum fuse_req_flag { + FR_ISREPLY, + FR_FORCE, + FR_BACKGROUND, + FR_WAITING, + FR_ABORTED, + FR_INTERRUPTED, + FR_LOCKED, + FR_PENDING, + FR_SENT, + FR_FINISHED, + FR_PRIVATE, +}; + +/** * A request to the client + * + * .waitq.lock protects the following fields: + * - FR_ABORTED + * - FR_LOCKED (may also be modified under fc->lock, tested under both) */ struct fuse_req { /** This can be on either pending processing or io lists in @@ -283,35 +306,8 @@ struct fuse_req { /** Unique ID for the interrupt request */ u64 intr_unique; - /* - * The following bitfields are either set once before the - * request is queued or setting/clearing them is protected by - * fuse_conn->lock - */ - - /** True if the request has reply */ - unsigned isreply:1; - - /** Force sending of the request even if interrupted */ - unsigned force:1; - - /** The request was aborted */ - unsigned aborted:1; - - /** Request is sent in the background */ - unsigned background:1; - - /** The request has been interrupted */ - unsigned interrupted:1; - - /** Data is being copied to/from the request */ - unsigned locked:1; - - /** Request is counted as "waiting" */ - unsigned waiting:1; - - /** State of the request */ - enum fuse_req_state state; + /* Request flags, updated with test/set/clear_bit() */ + unsigned long flags; /** The request input */ struct fuse_in in; @@ -380,6 +376,61 @@ struct fuse_req { struct file *stolen_file; }; +struct fuse_iqueue { + /** Connection established */ + unsigned connected; + + /** Readers of the connection are waiting on this */ + wait_queue_head_t waitq; + + /** The next unique request id */ + u64 reqctr; + + /** The list of pending requests */ + struct list_head pending; + + /** Pending interrupts */ + struct list_head interrupts; + + /** Queue of pending forgets */ + struct fuse_forget_link forget_list_head; + struct fuse_forget_link *forget_list_tail; + + /** Batching of FORGET requests (positive indicates FORGET batch) */ + int forget_batch; + + /** O_ASYNC requests */ + struct fasync_struct *fasync; +}; + +struct fuse_pqueue { + /** Connection established */ + unsigned connected; + + /** Lock protecting accessess to members of this structure */ + spinlock_t lock; + + /** The list of requests being processed */ + struct list_head processing; + + /** The list of requests under I/O */ + struct list_head io; +}; + +/** + * Fuse device instance + */ +struct fuse_dev { + /** Fuse connection for this device */ + struct fuse_conn *fc; + + /** Processing queue */ + struct fuse_pqueue pq; + + /** list entry on fc->devices */ + struct list_head entry; +}; + /** * A Fuse connection. * @@ -394,6 +445,9 @@ struct fuse_conn { /** Refcount */ atomic_t count; + /** Number of fuse_dev's */ + atomic_t dev_count; + struct rcu_head rcu; /** The user id for this mount */ @@ -411,17 +465,8 @@ struct fuse_conn { /** Maximum write size */ unsigned max_write; - /** Readers of the connection are waiting on this */ - wait_queue_head_t waitq; - - /** The list of pending requests */ - struct list_head pending; - - /** The list of requests being processed */ - struct list_head processing; - - /** The list of requests under I/O */ - struct list_head io; + /** Input queue */ + struct fuse_iqueue iq; /** The next unique kernel file handle */ u64 khctr; @@ -444,16 +489,6 @@ struct fuse_conn { /** The list of background requests set aside for later queuing */ struct list_head bg_queue; - /** Pending interrupts */ - struct list_head interrupts; - - /** Queue of pending forgets */ - struct fuse_forget_link forget_list_head; - struct fuse_forget_link *forget_list_tail; - - /** Batching of FORGET requests (positive indicates FORGET batch) */ - int forget_batch; - /** Flag indicating that INIT reply has been received. Allocating * any fuse request will be suspended until the flag is set */ int initialized; @@ -469,9 +504,6 @@ struct fuse_conn { /** waitq for reserved requests */ wait_queue_head_t reserved_req_waitq; - /** The next unique request id */ - u64 reqctr; - /** Connection established, cleared on umount, connection abort and device release */ unsigned connected; @@ -594,9 +626,6 @@ struct fuse_conn { /** number of dentries used in the above array */ int ctl_ndents; - /** O_ASYNC requests */ - struct fasync_struct *fasync; - /** Key for lock owner ID scrambling */ u32 scramble_key[4]; @@ -614,6 +643,9 @@ struct fuse_conn { /** Read/write semaphore to hold when accessing sb. */ struct rw_semaphore killsb; + + /** List of device instances belonging to this connection */ + struct list_head devices; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -826,6 +858,9 @@ void fuse_conn_init(struct fuse_conn *fc); */ void fuse_conn_put(struct fuse_conn *fc); +struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc); +void fuse_dev_free(struct fuse_dev *fud); + /** * Add connection to control filesystem */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 082ac1c97f39..ac81f48ab2f4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -362,8 +362,8 @@ static void fuse_send_destroy(struct fuse_conn *fc) if (req && fc->conn_init) { fc->destroy_req = NULL; req->in.h.opcode = FUSE_DESTROY; - req->force = 1; - req->background = 0; + __set_bit(FR_FORCE, &req->flags); + __clear_bit(FR_BACKGROUND, &req->flags); fuse_request_send(fc, req); fuse_put_request(fc, req); } @@ -567,30 +567,46 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) return 0; } +static void fuse_iqueue_init(struct fuse_iqueue *fiq) +{ + memset(fiq, 0, sizeof(struct fuse_iqueue)); + init_waitqueue_head(&fiq->waitq); + INIT_LIST_HEAD(&fiq->pending); + INIT_LIST_HEAD(&fiq->interrupts); + fiq->forget_list_tail = &fiq->forget_list_head; + fiq->connected = 1; +} + +static void fuse_pqueue_init(struct fuse_pqueue *fpq) +{ + memset(fpq, 0, sizeof(struct fuse_pqueue)); + spin_lock_init(&fpq->lock); + INIT_LIST_HEAD(&fpq->processing); + INIT_LIST_HEAD(&fpq->io); + fpq->connected = 1; +} + void fuse_conn_init(struct fuse_conn *fc) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); - init_waitqueue_head(&fc->waitq); + atomic_set(&fc->dev_count, 1); init_waitqueue_head(&fc->blocked_waitq); init_waitqueue_head(&fc->reserved_req_waitq); - INIT_LIST_HEAD(&fc->pending); - INIT_LIST_HEAD(&fc->processing); - INIT_LIST_HEAD(&fc->io); - INIT_LIST_HEAD(&fc->interrupts); + fuse_iqueue_init(&fc->iq); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); - fc->forget_list_tail = &fc->forget_list_head; + INIT_LIST_HEAD(&fc->devices); atomic_set(&fc->num_waiting, 0); fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; fc->khctr = 0; fc->polled_files = RB_ROOT; - fc->reqctr = 0; fc->blocked = 0; fc->initialized = 0; + fc->connected = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } @@ -930,6 +946,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) static void fuse_free_conn(struct fuse_conn *fc) { + WARN_ON(!list_empty(&fc->devices)); kfree_rcu(fc, rcu); } @@ -975,8 +992,42 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) return 0; } +struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) +{ + struct fuse_dev *fud; + + fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); + if (fud) { + fud->fc = fuse_conn_get(fc); + fuse_pqueue_init(&fud->pq); + + spin_lock(&fc->lock); + list_add_tail(&fud->entry, &fc->devices); + spin_unlock(&fc->lock); + } + + return fud; +} +EXPORT_SYMBOL_GPL(fuse_dev_alloc); + +void fuse_dev_free(struct fuse_dev *fud) +{ + struct fuse_conn *fc = fud->fc; + + if (fc) { + spin_lock(&fc->lock); + list_del(&fud->entry); + spin_unlock(&fc->lock); + + fuse_conn_put(fc); + } + kfree(fud); +} +EXPORT_SYMBOL_GPL(fuse_dev_free); + static int fuse_fill_super(struct super_block *sb, void *data, int silent) { + struct fuse_dev *fud; struct fuse_conn *fc; struct inode *root; struct fuse_mount_data d; @@ -1026,12 +1077,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_fput; fuse_conn_init(fc); + fc->release = fuse_free_conn; + + fud = fuse_dev_alloc(fc); + if (!fud) + goto err_put_conn; fc->dev = sb->s_dev; fc->sb = sb; err = fuse_bdi_init(fc, sb); if (err) - goto err_put_conn; + goto err_dev_free; sb->s_bdi = &fc->bdi; @@ -1040,7 +1096,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->dont_mask = 1; sb->s_flags |= MS_POSIXACL; - fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; @@ -1053,14 +1108,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) root = fuse_get_root_inode(sb, d.rootmode); root_dentry = d_make_root(root); if (!root_dentry) - goto err_put_conn; + goto err_dev_free; /* only now - we want root dentry with NULL ->d_op */ sb->s_d_op = &fuse_dentry_operations; init_req = fuse_request_alloc(0); if (!init_req) goto err_put_root; - init_req->background = 1; + __set_bit(FR_BACKGROUND, &init_req->flags); if (is_bdev) { fc->destroy_req = fuse_request_alloc(0); @@ -1079,8 +1134,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - fc->connected = 1; - file->private_data = fuse_conn_get(fc); + file->private_data = fud; mutex_unlock(&fuse_mutex); /* * atomic_dec_and_test() in fput() provides the necessary @@ -1099,6 +1153,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fuse_request_free(init_req); err_put_root: dput(root_dentry); + err_dev_free: + fuse_dev_free(fud); err_put_conn: fuse_bdi_destroy(fc); fuse_conn_put(fc); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 8d129bb7355a..682529c00996 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -458,7 +458,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) * pg_authenticate method for nfsv4 callback threads. * * The authflavor has been negotiated, so an incorrect flavor is a server - * bug. Drop packets with incorrect authflavor. + * bug. Deny packets with incorrect authflavor. * * All other checking done after NFS decoding where the nfs_client can be * found in nfs4_callback_compound @@ -468,12 +468,12 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) - return SVC_DROP; + return SVC_DENIED; break; case RPC_AUTH_GSS: /* No RPC_AUTH_GSS support yet in NFSv4.1 */ if (svc_is_backchannel(rqstp)) - return SVC_DROP; + return SVC_DENIED; } return SVC_OK; } diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 197806fb87ff..29e3c1b011b7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -327,10 +327,8 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr); /* Normal */ - if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { - slot->seq_nr++; + if (likely(args->csa_sequenceid == slot->seq_nr + 1)) goto out_ok; - } /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { @@ -418,6 +416,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_process_state *cps) { struct nfs4_slot_table *tbl; + struct nfs4_slot *slot; struct nfs_client *clp; int i; __be32 status = htonl(NFS4ERR_BADSESSION); @@ -429,25 +428,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) goto out; + tbl = &clp->cl_session->bc_slot_table; + slot = tbl->slots + args->csa_slotid; spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { - spin_unlock(&tbl->slot_tbl_lock); status = htonl(NFS4ERR_DELAY); /* Return NFS4ERR_BADSESSION if we're draining the session * in order to reset it. */ if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) status = htonl(NFS4ERR_BADSESSION); - goto out; + goto out_unlock; } - status = validate_seqid(&clp->cl_session->bc_slot_table, args); - spin_unlock(&tbl->slot_tbl_lock); + memcpy(&res->csr_sessionid, &args->csa_sessionid, + sizeof(res->csr_sessionid)); + res->csr_sequenceid = args->csa_sequenceid; + res->csr_slotid = args->csa_slotid; + res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + + status = validate_seqid(tbl, args); if (status) - goto out; + goto out_unlock; cps->slotid = args->csa_slotid; @@ -458,15 +464,17 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, */ if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { status = htonl(NFS4ERR_DELAY); - goto out; + goto out_unlock; } - memcpy(&res->csr_sessionid, &args->csa_sessionid, - sizeof(res->csr_sessionid)); - res->csr_sequenceid = args->csa_sequenceid; - res->csr_slotid = args->csa_slotid; - res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + /* + * RFC5661 20.9.3 + * If CB_SEQUENCE returns an error, then the state of the slot + * (sequence ID, cached reply) MUST NOT change. + */ + slot->seq_nr++; +out_unlock: + spin_unlock(&tbl->slot_tbl_lock); out: cps->clp = clp; /* put in nfs4_callback_compound */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 19ca95cdfd9b..6b1697a01dde 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -909,7 +909,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r xdr_init_encode(&xdr_out, &rqstp->rq_res, p); status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); - if (status == __constant_htonl(NFS4ERR_RESOURCE)) + if (status == htonl(NFS4ERR_RESOURCE)) return rpc_garbage_args; if (hdr_arg.minorversion == 0) { diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 892aefff3630..ecebb406cc1a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -825,7 +825,6 @@ error: * Load up the server record from information gained in an fsinfo record */ static void nfs_server_set_fsinfo(struct nfs_server *server, - struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo) { unsigned long max_rpc_payload; @@ -901,7 +900,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs if (error < 0) goto out_error; - nfs_server_set_fsinfo(server, mntfh, &fsinfo); + nfs_server_set_fsinfo(server, &fsinfo); /* Get some general file system info */ if (server->namelen == 0) { @@ -1193,8 +1192,6 @@ void nfs_clients_init(struct net *net) } #ifdef CONFIG_PROC_FS -static struct proc_dir_entry *proc_fs_nfs; - static int nfs_server_list_open(struct inode *inode, struct file *file); static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); @@ -1364,27 +1361,29 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) { struct nfs_server *server; struct nfs_client *clp; - char dev[8], fsid[17]; + char dev[13]; // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0' + char fsid[34]; // 2 * 16 for %llx, 1 for ':', 1 for '\0' struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_volume_list) { - seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); + seq_puts(m, "NV SERVER PORT DEV FSID" + " FSC\n"); return 0; } /* display one transport per line on subsequent lines */ server = list_entry(v, struct nfs_server, master_link); clp = server->nfs_client; - snprintf(dev, 8, "%u:%u", + snprintf(dev, sizeof(dev), "%u:%u", MAJOR(server->s_dev), MINOR(server->s_dev)); - snprintf(fsid, 17, "%llx:%llx", + snprintf(fsid, sizeof(fsid), "%llx:%llx", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); rcu_read_lock(); - seq_printf(m, "v%u %s %s %-7s %-17s %s\n", + seq_printf(m, "v%u %s %s %-12s %-33s %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), @@ -1434,27 +1433,20 @@ void nfs_fs_proc_net_exit(struct net *net) */ int __init nfs_fs_proc_init(void) { - struct proc_dir_entry *p; - - proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); - if (!proc_fs_nfs) + if (!proc_mkdir("fs/nfsfs", NULL)) goto error_0; /* a file of servers with which we're dealing */ - p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers"); - if (!p) + if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers")) goto error_1; /* a file of volumes that we have mounted */ - p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes"); - if (!p) - goto error_2; - return 0; + if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes")) + goto error_1; -error_2: - remove_proc_entry("servers", proc_fs_nfs); + return 0; error_1: - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("fs/nfsfs", NULL); error_0: return -ENOMEM; } @@ -1464,9 +1456,7 @@ error_0: */ void nfs_fs_proc_exit(void) { - remove_proc_entry("volumes", proc_fs_nfs); - remove_proc_entry("servers", proc_fs_nfs); - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("fs/nfsfs", NULL); } #endif /* CONFIG_PROC_FS */ diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b2c8b31b2be7..21457bb0edd6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1470,9 +1470,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx, { int err; - if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - *opened |= FILE_CREATED; - err = finish_open(file, dentry, do_open, opened); if (err) goto out; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8b8d83a526ce..cc4fa1ed61fc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } -#ifdef CONFIG_NFS_SWAP static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { - int ret; struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); *span = sis->pages; - rcu_read_lock(); - ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); - rcu_read_unlock(); - - return ret; + return rpc_clnt_swap_activate(clnt); } static void nfs_swap_deactivate(struct file *file) { struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); - rcu_read_lock(); - xs_swapper(rcu_dereference(clnt->cl_xprt), 0); - rcu_read_unlock(); + rpc_clnt_swap_deactivate(clnt); } -#endif const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, @@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, -#ifdef CONFIG_NFS_SWAP .swap_activate = nfs_swap_activate, .swap_deactivate = nfs_swap_deactivate, -#endif }; /* diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7d05089e52d6..c12951b9551e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -20,6 +20,7 @@ #include "../nfs4trace.h" #include "../iostat.h" #include "../nfs.h" +#include "../nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -182,17 +183,14 @@ static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) { - struct nfs4_ff_layout_mirror *tmp; int i, j; for (i = 0; i < fls->mirror_array_cnt - 1; i++) { for (j = i + 1; j < fls->mirror_array_cnt; j++) if (fls->mirror_array[i]->efficiency < - fls->mirror_array[j]->efficiency) { - tmp = fls->mirror_array[i]; - fls->mirror_array[i] = fls->mirror_array[j]; - fls->mirror_array[j] = tmp; - } + fls->mirror_array[j]->efficiency) + swap(fls->mirror_array[i], + fls->mirror_array[j]); } } @@ -274,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, spin_lock_init(&fls->mirror_array[i]->lock); fls->mirror_array[i]->ds_count = ds_count; + fls->mirror_array[i]->lseg = &fls->generic_hdr; /* deviceid */ rc = decode_deviceid(&stream, &devid); @@ -344,6 +343,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, fls->mirror_array[i]->gid); } + p = xdr_inline_decode(&stream, 4); + if (p) + fls->flags = be32_to_cpup(p); + ff_layout_sort_mirrors(fls); rc = ff_layout_check_layout(lgr); if (rc) @@ -415,6 +418,146 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) return 1; } +static void +nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + /* first IO request? */ + if (atomic_inc_return(&timer->n_ops) == 1) { + timer->start_time = ktime_get(); + } +} + +static ktime_t +nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + ktime_t start, now; + + if (atomic_dec_return(&timer->n_ops) < 0) + WARN_ON_ONCE(1); + + now = ktime_get(); + start = timer->start_time; + timer->start_time = now; + return ktime_sub(now, start); +} + +static ktime_t +nfs4_ff_layout_calc_completion_time(struct rpc_task *task) +{ + return ktime_sub(ktime_get(), task->tk_start); +} + +static bool +nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, + struct nfs4_ff_layoutstat *layoutstat) +{ + static const ktime_t notime = {0}; + ktime_t now = ktime_get(); + + nfs4_ff_start_busy_timer(&layoutstat->busy_timer); + if (ktime_equal(mirror->start_time, notime)) + mirror->start_time = now; + if (ktime_equal(mirror->last_report_time, notime)) + mirror->last_report_time = now; + if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= + FF_LAYOUTSTATS_REPORT_INTERVAL) { + mirror->last_report_time = now; + return true; + } + + return false; +} + +static void +nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + + iostat->ops_requested++; + iostat->bytes_requested += requested; +} + +static void +nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested, + __u64 completed, + ktime_t time_completed) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + ktime_t timer; + + iostat->ops_completed++; + iostat->bytes_completed += completed; + iostat->bytes_not_delivered += requested - completed; + + timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer); + iostat->total_busy_time = + ktime_add(iostat->total_busy_time, timer); + iostat->aggregate_completion_time = + ktime_add(iostat->aggregate_completion_time, time_completed); +} + +static void +nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + bool report; + + spin_lock(&mirror->lock); + report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); + spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); +} + +static void +nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed) +{ + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + bool report; + + spin_lock(&mirror->lock); + report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); + spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); +} + +static void +nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed, + enum nfs3_stable_how committed) +{ + if (committed == NFS_UNSTABLE) + requested = completed = 0; + + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + static int ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, @@ -631,7 +774,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) nfs_direct_set_resched_writes(hdr->dreq); /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = 0; + hdr->good_bytes = hdr->args.count; } return; } @@ -879,6 +1022,12 @@ static int ff_layout_read_done_cb(struct rpc_task *task, return 0; } +static bool +ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg) +{ + return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT); +} + /* * We reference the rpc_cred of the first WRITE that triggers the need for * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. @@ -891,6 +1040,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, static void ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) { + if (!ff_layout_need_layoutcommit(hdr->lseg)) + return; + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, hdr->mds_offset + hdr->res.count); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, @@ -909,6 +1061,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) static int ff_layout_read_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_read( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -962,15 +1118,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_read_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_read_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_READ) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -982,6 +1138,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + nfs4_ff_layout_stat_io_end_read(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1074,7 +1234,8 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, return -EAGAIN; } - if (data->verf.committed == NFS_UNSTABLE) + if (data->verf.committed == NFS_UNSTABLE + && ff_layout_need_layoutcommit(data->lseg)) pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); return 0; @@ -1083,6 +1244,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, static int ff_layout_write_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1116,15 +1281,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_write_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_write_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_WRITE) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -1134,6 +1299,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count, + hdr->res.verf->committed); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1152,8 +1322,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } +static void ff_layout_commit_prepare_common(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + 0); +} + static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) { + ff_layout_commit_prepare_common(task, data); rpc_call_start(task); } @@ -1161,10 +1340,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - ff_layout_setup_sequence(wdata->ds_clp, + if (ff_layout_setup_sequence(wdata->ds_clp, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + ff_layout_commit_prepare_common(task, data); +} + +static void ff_layout_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + struct nfs_page *req; + __u64 count = 0; + + if (task->tk_status == 0) { + list_for_each_entry(req, &cdata->pages, wb_list) + count += req->wb_bytes; + } + + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + count, count, NFS_FILE_SYNC); + + pnfs_generic_write_commit_done(task, data); } static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) @@ -1205,14 +1404,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { .rpc_call_prepare = ff_layout_commit_prepare_v3, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { .rpc_call_prepare = ff_layout_commit_prepare_v4, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; @@ -1256,7 +1455,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; - /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1385,6 +1583,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; + return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, @@ -1488,6 +1687,247 @@ out: dprintk("%s: Return\n", __func__); } +static int +ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +static size_t +ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf, + const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + */ + return snprintf(buf, buflen, "%pI6c", addr); +} + +/* Derived from rpc_sockaddr2uaddr */ +static void +ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) +{ + struct sockaddr *sap = (struct sockaddr *)&da->da_addr; + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + char *netid; + unsigned short port; + int len, netid_len; + __be32 *p; + + switch (sap->sa_family) { + case AF_INET: + if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + netid = "tcp"; + netid_len = 3; + break; + case AF_INET6: + if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + netid = "tcp6"; + netid_len = 4; + break; + default: + /* we only support tcp and tcp6 */ + WARN_ON_ONCE(1); + return; + } + + snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); + len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); + + p = xdr_reserve_space(xdr, 4 + netid_len); + xdr_encode_opaque(p, netid, netid_len); + + p = xdr_reserve_space(xdr, 4 + len); + xdr_encode_opaque(p, addrbuf, len); +} + +static void +ff_layout_encode_nfstime(struct xdr_stream *xdr, + ktime_t t) +{ + struct timespec64 ts; + __be32 *p; + + p = xdr_reserve_space(xdr, 12); + ts = ktime_to_timespec64(t); + p = xdr_encode_hyper(p, ts.tv_sec); + *p++ = cpu_to_be32(ts.tv_nsec); +} + +static void +ff_layout_encode_io_latency(struct xdr_stream *xdr, + struct nfs4_ff_io_stat *stat) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 5 * 8); + p = xdr_encode_hyper(p, stat->ops_requested); + p = xdr_encode_hyper(p, stat->bytes_requested); + p = xdr_encode_hyper(p, stat->ops_completed); + p = xdr_encode_hyper(p, stat->bytes_completed); + p = xdr_encode_hyper(p, stat->bytes_not_delivered); + ff_layout_encode_nfstime(xdr, stat->total_busy_time); + ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo) +{ + struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private; + struct nfs4_pnfs_ds_addr *da; + struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; + struct nfs_fh *fh = &mirror->fh_versions[0]; + __be32 *p, *start; + + da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); + dprintk("%s: DS %s: encoding address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + /* layoutupdate length */ + start = xdr_reserve_space(xdr, 4); + /* netaddr4 */ + ff_layout_encode_netaddr(xdr, da); + /* nfs_fh4 */ + p = xdr_reserve_space(xdr, 4 + fh->size); + xdr_encode_opaque(p, fh->data, fh->size); + /* ff_io_latency4 read */ + spin_lock(&mirror->lock); + ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); + /* ff_io_latency4 write */ + ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); + spin_unlock(&mirror->lock); + /* nfstime4 */ + ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); + /* bool */ + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(false); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); +} + +static bool +ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, + struct pnfs_layout_segment *pls, + int *dev_count, int dev_limit) +{ + struct nfs4_ff_layout_mirror *mirror; + struct nfs4_deviceid_node *dev; + struct nfs42_layoutstat_devinfo *devinfo; + int i; + + for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) { + if (*dev_count >= dev_limit) + break; + mirror = FF_LAYOUT_COMP(pls, i); + if (!mirror || !mirror->mirror_ds) + continue; + dev = FF_LAYOUT_DEVID_NODE(pls, i); + devinfo = &args->devinfo[*dev_count]; + memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); + devinfo->offset = pls->pls_range.offset; + devinfo->length = pls->pls_range.length; + /* well, we don't really know if IO is continuous or not! */ + devinfo->read_count = mirror->read_stat.io_stat.bytes_completed; + devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; + devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; + devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; + devinfo->layout_type = LAYOUT_FLEX_FILES; + devinfo->layoutstats_encode = ff_layout_encode_layoutstats; + devinfo->layout_private = mirror; + /* lseg refcount put in cleanup_layoutstats */ + pnfs_get_lseg(pls); + + ++(*dev_count); + } + + return *dev_count < dev_limit; +} + +static int +ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) +{ + struct pnfs_layout_segment *pls; + int dev_count = 0; + + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + dev_count += FF_LAYOUT_MIRROR_COUNT(pls); + } + spin_unlock(&args->inode->i_lock); + /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ + if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { + dprintk("%s: truncating devinfo to limit (%d:%d)\n", + __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); + dev_count = PNFS_LAYOUTSTATS_MAXDEV; + } + args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL); + if (!args->devinfo) + return -ENOMEM; + + dev_count = 0; + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count, + PNFS_LAYOUTSTATS_MAXDEV)) { + break; + } + } + spin_unlock(&args->inode->i_lock); + args->num_dev = dev_count; + + return 0; +} + +static void +ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) +{ + struct nfs4_ff_layout_mirror *mirror; + int i; + + for (i = 0; i < data->args.num_dev; i++) { + mirror = data->args.devinfo[i].layout_private; + data->args.devinfo[i].layout_private = NULL; + pnfs_put_lseg(mirror->lseg); + } +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -1510,6 +1950,8 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .alloc_deviceid_node = ff_layout_alloc_deviceid_node, .encode_layoutreturn = ff_layout_encode_layoutreturn, .sync = pnfs_nfs_generic_sync, + .prepare_layoutstats = ff_layout_prepare_layoutstats, + .cleanup_layoutstats = ff_layout_cleanup_layoutstats, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 070f20445b2d..f92f9a0a856b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -9,12 +9,17 @@ #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H #define FS_NFS_NFS4FLEXFILELAYOUT_H +#define FF_FLAGS_NO_LAYOUTCOMMIT 1 + #include "../pnfs.h" /* XXX: Let's filter out insanely large mirror count for now to avoid oom * due to network error etc. */ #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 +/* LAYOUTSTATS report interval in ms */ +#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) + struct nfs4_ff_ds_version { u32 version; u32 minor_version; @@ -41,24 +46,48 @@ struct nfs4_ff_layout_ds_err { struct nfs4_deviceid deviceid; }; +struct nfs4_ff_io_stat { + __u64 ops_requested; + __u64 bytes_requested; + __u64 ops_completed; + __u64 bytes_completed; + __u64 bytes_not_delivered; + ktime_t total_busy_time; + ktime_t aggregate_completion_time; +}; + +struct nfs4_ff_busy_timer { + ktime_t start_time; + atomic_t n_ops; +}; + +struct nfs4_ff_layoutstat { + struct nfs4_ff_io_stat io_stat; + struct nfs4_ff_busy_timer busy_timer; +}; + struct nfs4_ff_layout_mirror { + struct pnfs_layout_segment *lseg; /* back pointer */ u32 ds_count; u32 efficiency; struct nfs4_ff_layout_ds *mirror_ds; u32 fh_versions_cnt; struct nfs_fh *fh_versions; nfs4_stateid stateid; - struct nfs4_string user_name; - struct nfs4_string group_name; u32 uid; u32 gid; struct rpc_cred *cred; spinlock_t lock; + struct nfs4_ff_layoutstat read_stat; + struct nfs4_ff_layoutstat write_stat; + ktime_t start_time; + ktime_t last_report_time; }; struct nfs4_ff_layout_segment { struct pnfs_layout_segment generic_hdr; u64 stripe_unit; + u32 flags; u32 mirror_array_cnt; struct nfs4_ff_layout_mirror **mirror_array; }; diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 77a2d026aa12..f13e1969eedd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, __func__, PTR_ERR(cred)); return PTR_ERR(cred); } else { - mirror->cred = cred; + if (cmpxchg(&mirror->cred, NULL, cred)) + put_rpccred(cred); } } return 0; @@ -386,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); if (ds->ds_clp) - goto out; + goto out_update_creds; flavor = nfs4_ff_layout_choose_authflavor(mirror); @@ -430,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, } } } - +out_update_creds: if (ff_layout_update_mirror_cred(mirror, ds)) ds = NULL; out: diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f734562c6d24..b77b328a06d7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -678,6 +678,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) if (!err) { generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + if (S_ISDIR(inode->i_mode)) + stat->blksize = NFS_SERVER(inode)->dtsize; } out: trace_nfs_getattr_exit(inode, err); @@ -2008,17 +2010,15 @@ static int __init init_nfs_fs(void) if (err) goto out1; -#ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); -#endif - if ((err = register_nfs_fs()) != 0) + + err = register_nfs_fs(); + if (err) goto out0; return 0; out0: -#ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); -#endif nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); @@ -2049,9 +2049,7 @@ static void __exit exit_nfs_fs(void) nfs_destroy_nfspagecache(); nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); -#ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); -#endif unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 53852a4bd88b..9b04c2e6fffc 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, if (args->npages != 0) xdr_write_pages(xdr, args->pages, 0, args->len); else - xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); + xdr_reserve_space(xdr, args->len); error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 7afb8947dfdf..ff66ae700b89 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -5,11 +5,18 @@ #ifndef __LINUX_FS_NFS_NFS4_2_H #define __LINUX_FS_NFS_NFS4_2_H +/* + * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support + * more? Need to consider not to pre-alloc too much for a compound. + */ +#define PNFS_LAYOUTSTATS_MAXDEV (4) + /* nfs4.2proc.c */ int nfs42_proc_allocate(struct file *, loff_t, loff_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); - +int nfs42_proc_layoutstats_generic(struct nfs_server *, + struct nfs42_layoutstat_data *); /* nfs4.2xdr.h */ extern struct rpc_procinfo nfs4_2_procedures[]; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3a9e75235f30..f486b80f927a 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -10,6 +10,11 @@ #include <linux/nfs_fs.h> #include "nfs4_fs.h" #include "nfs42.h" +#include "iostat.h" +#include "pnfs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, fmode_t fmode) @@ -165,3 +170,85 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } + +static void +nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *server = NFS_SERVER(data->args.inode); + + nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, + &data->res.seq_res, task); +} + +static void +nfs42_layoutstat_done(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return; + + switch (task->tk_status) { + case 0: + break; + case -ENOTSUPP: + case -EOPNOTSUPP: + NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS; + default: + dprintk("%s server returns %d\n", __func__, task->tk_status); + } +} + +static void +nfs42_layoutstat_release(void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *nfss = NFS_SERVER(data->args.inode); + + if (nfss->pnfs_curr_ld->cleanup_layoutstats) + nfss->pnfs_curr_ld->cleanup_layoutstats(data); + + pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags); + smp_mb__after_atomic(); + nfs_iput_and_deactive(data->inode); + kfree(data->args.devinfo); + kfree(data); +} + +static const struct rpc_call_ops nfs42_layoutstat_ops = { + .rpc_call_prepare = nfs42_layoutstat_prepare, + .rpc_call_done = nfs42_layoutstat_done, + .rpc_release = nfs42_layoutstat_release, +}; + +int nfs42_proc_layoutstats_generic(struct nfs_server *server, + struct nfs42_layoutstat_data *data) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs42_layoutstat_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + + data->inode = nfs_igrab_and_active(data->args.inode); + if (!data->inode) { + nfs42_layoutstat_release(data); + return -EAGAIN; + } + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + return PTR_ERR(task); + return 0; +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1a25b27248f2..a6bd27da6286 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,8 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#include "nfs42.h" + #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) @@ -22,6 +24,16 @@ 1 /* whence */ + \ 2 /* offset */ + \ 2 /* length */) +#define encode_io_info_maxsz 4 +#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \ + 2 /* offset */ + \ + 2 /* length */ + \ + encode_stateid_maxsz + \ + encode_io_info_maxsz + \ + encode_io_info_maxsz + \ + 1 /* opaque devaddr4 length */ + \ + XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) +#define decode_layoutstats_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -45,6 +57,14 @@ #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_seek_maxsz) +#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz) +#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) static void encode_fallocate(struct xdr_stream *xdr, @@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr, encode_uint32(xdr, args->sa_what); } +static void encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr); + p = reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, devinfo->offset); + p = xdr_encode_hyper(p, devinfo->length); + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4); + p = xdr_encode_hyper(p, devinfo->read_count); + p = xdr_encode_hyper(p, devinfo->read_bytes); + p = xdr_encode_hyper(p, devinfo->write_count); + p = xdr_encode_hyper(p, devinfo->write_bytes); + p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data, + NFS4_DEVICEID4_SIZE); + /* Encode layoutupdate4 */ + *p++ = cpu_to_be32(devinfo->layout_type); + if (devinfo->layoutstats_encode != NULL) + devinfo->layoutstats_encode(xdr, args, devinfo); + else + encode_uint32(xdr, 0); +} + /* * Encode ALLOCATE request */ @@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode LAYOUTSTATS request + */ +static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args) +{ + int i; + + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < args->num_dev; i++) + encode_layoutstats(xdr, args, &args->devinfo[i], &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -169,6 +238,12 @@ out_overflow: return -EIO; } +static int decode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + return decode_op_hdr(xdr, OP_LAYOUTSTATS); +} + /* * Decode ALLOCATE request */ @@ -246,4 +321,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode LAYOUTSTATS request + */ +static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + struct compound_hdr hdr; + int status, i; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < res->num_dev; i++) { + status = decode_layoutstats(xdr, res); + if (status) + goto out; + } +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdef424b0cd3..ea3bee919a76 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); +extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e42be52a8c18..3aa6a9ba5113 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -676,7 +676,6 @@ found: break; } - /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); nfs_put_client(prev); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f58c17b3b480..dcd39d4e2efe 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -41,6 +41,10 @@ nfs4_file_open(struct inode *inode, struct file *filp) dprintk("NFS: open file(%pd2)\n", dentry); + err = nfs_check_flags(openflags); + if (err) + return err; + if ((openflags & O_ACCMODE) == 3) openflags--; diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index c0b3a16b4a00..039b3eb6d834 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -35,13 +35,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p goto out; } - if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot obtained referral\n"); - ret = -EREMOTE; - goto out; - } - memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); out: nfs_free_fattr(fsinfo.fattr); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 2e1737c40a29..535dfc69c628 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -494,12 +494,7 @@ nfs_idmap_delete(struct nfs_client *clp) int nfs_idmap_init(void) { - int ret; - ret = nfs_idmap_init_keyring(); - if (ret != 0) - goto out; -out: - return ret; + return nfs_idmap_init_keyring(); } void nfs_idmap_quit(void) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55e1e3af23a3..6f228b5af819 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -356,6 +356,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ case 0: return 0; case -NFS4ERR_OPENMODE: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: if (inode && nfs4_have_delegation(inode, FMODE_READ)) { nfs4_inode_return_delegation(inode); exception->retry = 1; @@ -367,15 +370,6 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ if (ret < 0) break; goto wait_on_recovery; - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - ret = nfs4_schedule_stateid_recovery(server, state); - if (ret < 0) - break; - goto wait_on_recovery; case -NFS4ERR_EXPIRED: if (state != NULL) { ret = nfs4_schedule_stateid_recovery(server, state); @@ -482,8 +476,8 @@ struct nfs4_call_sync_data { struct nfs4_sequence_res *seq_res; }; -static void nfs4_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) +void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) { args->sa_slot = NULL; args->sa_cache_this = cache_reply; @@ -1553,6 +1547,13 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod struct nfs4_state *newstate; int ret; + if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR || + opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) && + (opendata->o_arg.u.delegation_type & fmode) != fmode) + /* This mode can't have been delegated, so we must have + * a valid open_stateid to cover it - not need to reclaim. + */ + return 0; opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; opendata->o_arg.share_access = nfs4_map_atomic_open_share( @@ -1684,6 +1685,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct "%d.\n", __func__, err); case 0: case -ENOENT: + case -EAGAIN: case -ESTALE: break; case -NFS4ERR_BADSESSION: @@ -3355,6 +3357,8 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, goto out; case -NFS4ERR_MOVED: err = nfs4_get_referral(client, dir, name, fattr, fhandle); + if (err == -NFS4ERR_MOVED) + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); goto out; case -NFS4ERR_WRONGSEC: err = -EPERM; @@ -4955,49 +4959,128 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } -static unsigned int -nfs4_init_nonuniform_client_string(struct nfs_client *clp, - char *buf, size_t len) +static int +nfs4_init_nonuniform_client_string(struct nfs_client *clp) { - unsigned int result; + int result; + size_t len; + char *str; + bool retried = false; if (clp->cl_owner_id != NULL) - return strlcpy(buf, clp->cl_owner_id, len); + return 0; +retry: + rcu_read_lock(); + len = 10 + strlen(clp->cl_ipaddr) + 1 + + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + + 1 + + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) + + 1; + rcu_read_unlock(); + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; rcu_read_lock(); - result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO)); + result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); rcu_read_unlock(); - clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); - return result; + + /* Did something change? */ + if (result >= len) { + kfree(str); + if (retried) + return -EINVAL; + retried = true; + goto retry; + } + clp->cl_owner_id = str; + return 0; } -static unsigned int -nfs4_init_uniform_client_string(struct nfs_client *clp, - char *buf, size_t len) +static int +nfs4_init_uniquifier_client_string(struct nfs_client *clp) +{ + int result; + size_t len; + char *str; + + len = 10 + 10 + 1 + 10 + 1 + + strlen(nfs4_client_id_uniquifier) + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, clp->cl_minorversion, + nfs4_client_id_uniquifier, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; +} + +static int +nfs4_init_uniform_client_string(struct nfs_client *clp) { - const char *nodename = clp->cl_rpcclient->cl_nodename; - unsigned int result; + int result; + size_t len; + char *str; if (clp->cl_owner_id != NULL) - return strlcpy(buf, clp->cl_owner_id, len); + return 0; if (nfs4_client_id_uniquifier[0] != '\0') - result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", - clp->rpc_ops->version, - clp->cl_minorversion, - nfs4_client_id_uniquifier, - nodename); - else - result = scnprintf(buf, len, "Linux NFSv%u.%u %s", - clp->rpc_ops->version, clp->cl_minorversion, - nodename); - clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); - return result; + return nfs4_init_uniquifier_client_string(clp); + + len = 10 + 10 + 1 + 10 + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; } /* @@ -5044,7 +5127,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, struct nfs4_setclientid setclientid = { .sc_verifier = &sc_verifier, .sc_prog = program, - .sc_cb_ident = clp->cl_cb_ident, + .sc_clnt = clp, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], @@ -5064,16 +5147,15 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) - setclientid.sc_name_len = - nfs4_init_uniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); + status = nfs4_init_uniform_client_string(clp); else - setclientid.sc_name_len = - nfs4_init_nonuniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); + status = nfs4_init_nonuniform_client_string(clp); + + if (status) + goto out; + /* cb_client4 */ setclientid.sc_netid_len = nfs4_init_callback_netid(clp, @@ -5083,9 +5165,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - dprintk("NFS call setclientid auth=%s, '%.*s'\n", + dprintk("NFS call setclientid auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - setclientid.sc_name_len, setclientid.sc_name); + clp->cl_owner_id); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) { status = PTR_ERR(task); @@ -5402,6 +5484,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); + get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); p->server = NFS_SERVER(inode); return p; @@ -5413,6 +5496,7 @@ static void nfs4_locku_release_calldata(void *data) nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); put_nfs_open_context(calldata->ctx); + fput(calldata->fl.fl_file); kfree(calldata); } @@ -6846,11 +6930,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, }; nfs4_init_boot_verifier(clp, &verifier); - args.id_len = nfs4_init_uniform_client_string(clp, args.id, - sizeof(args.id)); - dprintk("NFS call exchange_id auth=%s, '%.*s'\n", + + status = nfs4_init_uniform_client_string(clp); + if (status) + goto out; + + dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - args.id_len, args.id); + clp->cl_owner_id); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); @@ -6885,7 +6972,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, /* unsupported! */ WARN_ON_ONCE(1); status = -EINVAL; - goto out_server_scope; + goto out_impl_id; } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); @@ -6913,6 +7000,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, /* use the most recent implementation id */ kfree(clp->cl_implid); clp->cl_implid = res.impl_id; + res.impl_id = NULL; if (clp->cl_serverscope != NULL && !nfs41_same_server_scope(clp->cl_serverscope, @@ -6926,15 +7014,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (clp->cl_serverscope == NULL) { clp->cl_serverscope = res.server_scope; - goto out; + res.server_scope = NULL; } - } else - kfree(res.impl_id); + } -out_server_owner: - kfree(res.server_owner); +out_impl_id: + kfree(res.impl_id); out_server_scope: kfree(res.server_scope); +out_server_owner: + kfree(res.server_owner); out: if (clp->cl_implid != NULL) dprintk("NFS reply exchange_id: Server Implementation ID: " @@ -8061,9 +8150,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) struct rpc_task *task; int status = 0; - dprintk("NFS: %4d initiating layoutcommit call. sync %d " - "lbw: %llu inode %lu\n", - data->task.tk_pid, sync, + dprintk("NFS: initiating layoutcommit call. sync %d " + "lbw: %llu inode %lu\n", sync, data->args.lastbytewritten, data->args.inode->i_ino); @@ -8557,7 +8645,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ATOMIC_OPEN_V1 | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE - | NFS_CAP_SEEK, + | NFS_CAP_SEEK + | NFS_CAP_LAYOUTSTATS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2782cfca2265..605840dc89cf 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -309,7 +309,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) goto do_confirm; - nfs4_begin_drain_session(clp); status = nfs4_proc_exchange_id(clp, cred); if (status != 0) goto out; @@ -1482,6 +1481,8 @@ restart: spin_unlock(&state->state_lock); } nfs4_put_open_state(state); + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + &state->flags); spin_lock(&sp->so_lock); goto restart; } @@ -1830,6 +1831,7 @@ static int nfs4_establish_lease(struct nfs_client *clp) clp->cl_mvops->reboot_recovery_ops; int status; + nfs4_begin_drain_session(clp); cred = nfs4_get_clid_cred(clp); if (cred == NULL) return -ENOENT; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0aea97841d30..558cd65dbdb7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -139,7 +139,8 @@ static int nfs4_stat_to_errno(int); #define encode_setclientid_maxsz \ (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ - XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ + /* client name */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* sc_prog */ + \ 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ @@ -288,7 +289,8 @@ static int nfs4_stat_to_errno(int); #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ encode_verifier_maxsz + \ 1 /* co_ownerid.len */ + \ - XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ + /* eia_clientowner */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* flags */ + \ 1 /* spa_how */ + \ /* max is SP4_MACH_CRED (for now) */ + \ @@ -1667,13 +1669,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); encode_nfs4_verifier(xdr, setclientid->sc_verifier); - encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); + encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id), + setclientid->sc_clnt->cl_owner_id); p = reserve_space(xdr, 4); *p = cpu_to_be32(setclientid->sc_prog); encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); p = reserve_space(xdr, 4); - *p = cpu_to_be32(setclientid->sc_cb_ident); + *p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident); } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) @@ -1747,7 +1750,8 @@ static void encode_exchange_id(struct xdr_stream *xdr, encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); encode_nfs4_verifier(xdr, args->verifier); - encode_string(xdr, args->id_len, args->id); + encode_string(xdr, strlen(args->client->cl_owner_id), + args->client->cl_owner_id); encode_uint32(xdr, args->flags); encode_uint32(xdr, args->state_protect.how); @@ -7427,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SEEK, enc_seek, dec_seek), PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), + PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 282b39369510..1da68d3b1eda 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -636,9 +636,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); - dprintk("NFS: %5u initiated pgio call " + dprintk("NFS: initiated pgio call " "(req %s/%llu, %u bytes @ offset %llu)\n", - hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), hdr->args.count, @@ -690,8 +689,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, static void nfs_pgio_release(void *calldata) { struct nfs_pgio_header *hdr = calldata; - if (hdr->rw_ops->rw_release) - hdr->rw_ops->rw_release(hdr); nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); } @@ -711,7 +708,9 @@ static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor * @inode: pointer to inode - * @doio: pointer to io function + * @pg_ops: pointer to pageio operations + * @compl_ops: pointer to pageio completion operations + * @rw_ops: pointer to nfs read/write operations * @bsize: io block size * @io_flags: extra parameters for the io function */ @@ -1186,6 +1185,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an * nfs_pageio_descriptor * @desc: pointer to io descriptor + * @mirror_idx: pointer to mirror index */ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, u32 mirror_idx) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 230606243be6..0ba9a02c9566 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@ #include "iostat.h" #include "nfs4trace.h" #include "delegation.h" +#include "nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -1821,6 +1822,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); + set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); @@ -1865,6 +1867,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } static enum pnfs_try_status @@ -1979,6 +1982,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } /* @@ -2247,3 +2251,63 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) } return thp; } + +#if IS_ENABLED(CONFIG_NFS_V4_2) +int +pnfs_report_layoutstat(struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs42_layoutstat_data *data; + struct pnfs_layout_hdr *hdr; + int status = 0; + + if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) + goto out; + + if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) + goto out; + + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) + goto out; + + spin_lock(&inode->i_lock); + if (!NFS_I(inode)->layout) { + spin_unlock(&inode->i_lock); + goto out; + } + hdr = NFS_I(inode)->layout; + pnfs_get_layout_hdr(hdr); + spin_unlock(&inode->i_lock); + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + status = -ENOMEM; + goto out_put; + } + + data->args.fh = NFS_FH(inode); + data->args.inode = inode; + nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); + status = ld->prepare_layoutstats(&data->args); + if (status) + goto out_free; + + status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); + +out: + dprintk("%s returns %d\n", __func__, status); + return status; + +out_free: + kfree(data); +out_put: + pnfs_put_layout_hdr(hdr); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); + smp_mb__after_atomic(); + goto out; +} +EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); +#endif diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1e6308f82fc3..3e6ab7bfbabd 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type { void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); + int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); + void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data); }; struct pnfs_layout_hdr { @@ -290,7 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); - /* nfs4_deviceid_flags */ enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ @@ -689,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) #endif /* CONFIG_NFS_V4_1 */ +#if IS_ENABLED(CONFIG_NFS_V4_2) +int pnfs_report_layoutstat(struct inode *inode); +#else +static inline int +pnfs_report_layoutstat(struct inode *inode) +{ + return 0; +} +#endif + #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e6c262555e08..65869ca9c851 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1290,6 +1290,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); nfs_unlock_request(req); nfs_end_page_writeback(req); nfs_release_request(req); @@ -1348,11 +1349,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) -{ - /* do nothing! */ -} - /* * Special version of should_remove_suid() that ignores capabilities. */ @@ -1556,7 +1552,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, /* Set up the initial task struct. */ nfs_ops->commit_setup(data, &msg); - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + dprintk("NFS: initiated commit call\n"); nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); @@ -2013,7 +2009,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_mode = FMODE_WRITE, .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, - .rw_release = nfs_writeback_release_common, .rw_done = nfs_writeback_done, .rw_result = nfs_writeback_result, .rw_initiate = nfs_initiate_write, diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 450648697433..5b1e2a497e51 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -26,7 +26,7 @@ #include <linux/fs.h> /* struct inode */ #include <linux/fsnotify_backend.h> #include <linux/idr.h> -#include <linux/init.h> /* module_init */ +#include <linux/init.h> /* fs_initcall */ #include <linux/inotify.h> #include <linux/kernel.h> /* roundup() */ #include <linux/namei.h> /* LOOKUP_FOLLOW */ @@ -812,4 +812,4 @@ static int __init inotify_user_setup(void) return 0; } -module_init(inotify_user_setup); +fs_initcall(inotify_user_setup); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 907870e81a72..70e9af551600 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -23,6 +23,7 @@ struct ovl_cache_entry { u64 ino; struct list_head l_node; struct rb_node node; + struct ovl_cache_entry *next_maybe_whiteout; bool is_whiteout; char name[]; }; @@ -39,7 +40,7 @@ struct ovl_readdir_data { struct rb_root root; struct list_head *list; struct list_head middle; - struct dentry *dir; + struct ovl_cache_entry *first_maybe_whiteout; int count; int err; }; @@ -79,7 +80,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, return NULL; } -static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, +static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, const char *name, int len, u64 ino, unsigned int d_type) { @@ -98,29 +99,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, p->is_whiteout = false; if (d_type == DT_CHR) { - struct dentry *dentry; - const struct cred *old_cred; - struct cred *override_cred; - - override_cred = prepare_creds(); - if (!override_cred) { - kfree(p); - return NULL; - } - - /* - * CAP_DAC_OVERRIDE for lookup - */ - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - old_cred = override_creds(override_cred); - - dentry = lookup_one_len(name, dir, len); - if (!IS_ERR(dentry)) { - p->is_whiteout = ovl_is_whiteout(dentry); - dput(dentry); - } - revert_creds(old_cred); - put_cred(override_cred); + p->next_maybe_whiteout = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p; } return p; } @@ -148,7 +128,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, return 0; } - p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type); + p = ovl_cache_entry_new(rdd, name, len, ino, d_type); if (p == NULL) return -ENOMEM; @@ -169,7 +149,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, if (p) { list_move_tail(&p->l_node, &rdd->middle); } else { - p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type); + p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); if (p == NULL) rdd->err = -ENOMEM; else @@ -219,6 +199,43 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); } +static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) +{ + int err; + struct ovl_cache_entry *p; + struct dentry *dentry; + const struct cred *old_cred; + struct cred *override_cred; + + override_cred = prepare_creds(); + if (!override_cred) + return -ENOMEM; + + /* + * CAP_DAC_OVERRIDE for lookup + */ + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + old_cred = override_creds(override_cred); + + err = mutex_lock_killable(&dir->d_inode->i_mutex); + if (!err) { + while (rdd->first_maybe_whiteout) { + p = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p->next_maybe_whiteout; + dentry = lookup_one_len(p->name, dir, p->len); + if (!IS_ERR(dentry)) { + p->is_whiteout = ovl_is_whiteout(dentry); + dput(dentry); + } + } + mutex_unlock(&dir->d_inode->i_mutex); + } + revert_creds(old_cred); + put_cred(override_cred); + + return err; +} + static inline int ovl_dir_read(struct path *realpath, struct ovl_readdir_data *rdd) { @@ -229,7 +246,7 @@ static inline int ovl_dir_read(struct path *realpath, if (IS_ERR(realfile)) return PTR_ERR(realfile); - rdd->dir = realpath->dentry; + rdd->first_maybe_whiteout = NULL; rdd->ctx.pos = 0; do { rdd->count = 0; @@ -238,6 +255,10 @@ static inline int ovl_dir_read(struct path *realpath, if (err >= 0) err = rdd->err; } while (!err && rdd->count); + + if (!err && rdd->first_maybe_whiteout) + err = ovl_check_whiteouts(realpath->dentry, rdd); + fput(realfile); return err; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index bf8537c7f455..8a08c582bc22 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -273,10 +273,57 @@ static void ovl_dentry_release(struct dentry *dentry) } } +static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct ovl_entry *oe = dentry->d_fsdata; + unsigned int i; + int ret = 1; + + for (i = 0; i < oe->numlower; i++) { + struct dentry *d = oe->lowerstack[i].dentry; + + if (d->d_flags & DCACHE_OP_REVALIDATE) { + ret = d->d_op->d_revalidate(d, flags); + if (ret < 0) + return ret; + if (!ret) { + if (!(flags & LOOKUP_RCU)) + d_invalidate(d); + return -ESTALE; + } + } + } + return 1; +} + +static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct ovl_entry *oe = dentry->d_fsdata; + unsigned int i; + int ret = 1; + + for (i = 0; i < oe->numlower; i++) { + struct dentry *d = oe->lowerstack[i].dentry; + + if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { + ret = d->d_op->d_weak_revalidate(d, flags); + if (ret <= 0) + break; + } + } + return ret; +} + static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, }; +static const struct dentry_operations ovl_reval_dentry_operations = { + .d_release = ovl_dentry_release, + .d_revalidate = ovl_dentry_revalidate, + .d_weak_revalidate = ovl_dentry_weak_revalidate, +}; + static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); @@ -288,6 +335,20 @@ static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) return oe; } +static bool ovl_dentry_remote(struct dentry *dentry) +{ + return dentry->d_flags & + (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); +} + +static bool ovl_dentry_weird(struct dentry *dentry) +{ + return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | + DCACHE_MANAGE_TRANSIT | + DCACHE_OP_HASH | + DCACHE_OP_COMPARE); +} + static inline struct dentry *ovl_lookup_real(struct dentry *dir, struct qstr *name) { @@ -303,6 +364,10 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir, } else if (!dentry->d_inode) { dput(dentry); dentry = NULL; + } else if (ovl_dentry_weird(dentry)) { + dput(dentry); + /* Don't support traversing automounts and other weirdness */ + dentry = ERR_PTR(-EREMOTE); } return dentry; } @@ -350,6 +415,11 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, goto out; if (this) { + if (unlikely(ovl_dentry_remote(this))) { + dput(this); + err = -EREMOTE; + goto out; + } if (ovl_is_whiteout(this)) { dput(this); this = NULL; @@ -694,25 +764,6 @@ static void ovl_unescape(char *s) } } -static bool ovl_is_allowed_fs_type(struct dentry *root) -{ - const struct dentry_operations *dop = root->d_op; - - /* - * We don't support: - * - automount filesystems - * - filesystems with revalidate (FIXME for lower layer) - * - filesystems with case insensitive names - */ - if (dop && - (dop->d_manage || dop->d_automount || - dop->d_revalidate || dop->d_weak_revalidate || - dop->d_compare || dop->d_hash)) { - return false; - } - return true; -} - static int ovl_mount_dir_noesc(const char *name, struct path *path) { int err = -EINVAL; @@ -727,7 +778,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) goto out; } err = -EINVAL; - if (!ovl_is_allowed_fs_type(path->dentry)) { + if (ovl_dentry_weird(path->dentry)) { pr_err("overlayfs: filesystem on '%s' not supported\n", name); goto out_put; } @@ -751,13 +802,21 @@ static int ovl_mount_dir(const char *name, struct path *path) if (tmp) { ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); + + if (!err) + if (ovl_dentry_remote(path->dentry)) { + pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", + tmp); + path_put(path); + err = -EINVAL; + } kfree(tmp); } return err; } static int ovl_lower_dir(const char *name, struct path *path, long *namelen, - int *stack_depth) + int *stack_depth, bool *remote) { int err; struct kstatfs statfs; @@ -774,6 +833,9 @@ static int ovl_lower_dir(const char *name, struct path *path, long *namelen, *namelen = max(*namelen, statfs.f_namelen); *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); + if (ovl_dentry_remote(path->dentry)) + *remote = true; + return 0; out_put: @@ -827,6 +889,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) unsigned int numlower; unsigned int stacklen = 0; unsigned int i; + bool remote = false; int err; err = -ENOMEM; @@ -900,7 +963,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) lower = lowertmp; for (numlower = 0; numlower < stacklen; numlower++) { err = ovl_lower_dir(lower, &stack[numlower], - &ufs->lower_namelen, &sb->s_stack_depth); + &ufs->lower_namelen, &sb->s_stack_depth, + &remote); if (err) goto out_put_lowerpath; @@ -958,7 +1022,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ufs->upper_mnt) sb->s_flags |= MS_RDONLY; - sb->s_d_op = &ovl_dentry_operations; + if (remote) + sb->s_d_op = &ovl_reval_dentry_operations; + else + sb->s_d_op = &ovl_dentry_operations; err = -ENOMEM; oe = ovl_alloc_entry(numlower); diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h index 273de709495c..b52c0dc4b492 100644 --- a/include/acpi/acnames.h +++ b/include/acpi/acnames.h @@ -51,6 +51,7 @@ #define METHOD_NAME__BBN "_BBN" #define METHOD_NAME__CBA "_CBA" #define METHOD_NAME__CID "_CID" +#define METHOD_NAME__CLS "_CLS" #define METHOD_NAME__CRS "_CRS" #define METHOD_NAME__DDN "_DDN" #define METHOD_NAME__HID "_HID" diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h index a8f344363e77..f56de8c5d844 100644 --- a/include/acpi/acoutput.h +++ b/include/acpi/acoutput.h @@ -294,8 +294,12 @@ /* DEBUG_PRINT functions */ -#define ACPI_DEBUG_PRINT(plist) ACPI_ACTUAL_DEBUG plist -#define ACPI_DEBUG_PRINT_RAW(plist) ACPI_ACTUAL_DEBUG_RAW plist +#ifndef COMPILER_VA_MACRO + +#define ACPI_DEBUG_PRINT(plist) acpi_debug_print plist +#define ACPI_DEBUG_PRINT_RAW(plist) acpi_debug_print_raw plist + +#else /* Helper macros for DEBUG_PRINT */ @@ -315,6 +319,11 @@ ACPI_DO_DEBUG_PRINT (acpi_debug_print_raw, level, line, \ filename, modulename, component, __VA_ARGS__) +#define ACPI_DEBUG_PRINT(plist) ACPI_ACTUAL_DEBUG plist +#define ACPI_DEBUG_PRINT_RAW(plist) ACPI_ACTUAL_DEBUG_RAW plist + +#endif + /* * Function entry tracing * diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index d68f1cd39c49..e8ec18a4a634 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20150515 +#define ACPI_CA_VERSION 0x20150619 #include <acpi/acconfig.h> #include <acpi/actypes.h> @@ -195,9 +195,18 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE); * address. Although ACPICA adheres to the ACPI specification which * requires the use of the corresponding 64-bit address if it is non-zero, * some machines have been found to have a corrupted non-zero 64-bit - * address. Default is TRUE, favor the 32-bit addresses. + * address. Default is FALSE, do not favor the 32-bit addresses. */ -ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, TRUE); +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, FALSE); + +/* + * Optionally use 32-bit FACS table addresses. + * It is reported that some platforms fail to resume from system suspending + * if 64-bit FACS table address is selected: + * https://bugzilla.kernel.org/show_bug.cgi?id=74021 + * Default is TRUE, favor the 32-bit addresses. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_facs_addresses, TRUE); /* * Optionally truncate I/O addresses to 16 bits. Provides compatibility @@ -220,6 +229,11 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_disable_auto_repair, FALSE); ACPI_INIT_GLOBAL(u8, acpi_gbl_disable_ssdt_table_install, FALSE); /* + * Optionally enable runtime namespace override. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_runtime_namespace_override, TRUE); + +/* * We keep track of the latest version of Windows that has been requested by * the BIOS. ACPI 5.0. */ @@ -814,8 +828,12 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_leave_sleep_state(u8 sleep_state)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_set_firmware_waking_vector(u32 - physical_address)) + acpi_set_firmware_waking_vectors + (acpi_physical_address physical_address, + acpi_physical_address physical_address64)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_set_firmware_waking_vector(u32 + physical_address)) #if ACPI_MACHINE_WIDTH == 64 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_set_firmware_waking_vector64(u64 diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index cb8a6b97ceda..2d5faf508cad 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -65,6 +65,7 @@ #define ACPI_SIG_DSDT "DSDT" /* Differentiated System Description Table */ #define ACPI_SIG_FADT "FACP" /* Fixed ACPI Description Table */ #define ACPI_SIG_FACS "FACS" /* Firmware ACPI Control Structure */ +#define ACPI_SIG_OSDT "OSDT" /* Override System Description Table */ #define ACPI_SIG_PSDT "PSDT" /* Persistent System Description Table */ #define ACPI_SIG_RSDP "RSD PTR " /* Root System Description Pointer */ #define ACPI_SIG_RSDT "RSDT" /* Root System Description Table */ diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 06b61f01ea59..fcd570999f35 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -835,6 +835,17 @@ struct acpi_madt_generic_distributor { u8 reserved2[3]; /* reserved - must be zero */ }; +/* Values for Version field above */ + +enum acpi_madt_gic_version { + ACPI_MADT_GIC_VERSION_NONE = 0, + ACPI_MADT_GIC_VERSION_V1 = 1, + ACPI_MADT_GIC_VERSION_V2 = 2, + ACPI_MADT_GIC_VERSION_V3 = 3, + ACPI_MADT_GIC_VERSION_V4 = 4, + ACPI_MADT_GIC_VERSION_RESERVED = 5 /* 5 and greater are reserved */ +}; + /* 13: Generic MSI Frame (ACPI 5.1) */ struct acpi_madt_generic_msi_frame { diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 370d69d871a0..a948fc586b9b 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -51,8 +51,8 @@ * These tables are not consumed directly by the ACPICA subsystem, but are * included here to support device drivers and the AML disassembler. * - * The tables in this file are defined by third-party specifications, and are - * not defined directly by the ACPI specification itself. + * Generally, the tables in this file are defined by third-party specifications, + * and are not defined directly by the ACPI specification itself. * ******************************************************************************/ @@ -80,6 +80,7 @@ #define ACPI_SIG_SPCR "SPCR" /* Serial Port Console Redirection table */ #define ACPI_SIG_SPMI "SPMI" /* Server Platform Management Interface table */ #define ACPI_SIG_TCPA "TCPA" /* Trusted Computing Platform Alliance table */ +#define ACPI_SIG_TPM2 "TPM2" /* Trusted Platform Module 2.0 H/W interface table */ #define ACPI_SIG_UEFI "UEFI" /* Uefi Boot Optimization Table */ #define ACPI_SIG_VRTC "VRTC" /* Virtual Real Time Clock Table */ #define ACPI_SIG_WAET "WAET" /* Windows ACPI Emulated devices Table */ @@ -1179,20 +1180,85 @@ enum acpi_spmi_interface_types { /******************************************************************************* * * TCPA - Trusted Computing Platform Alliance table - * Version 1 + * Version 2 + * + * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0", + * December 19, 2014 * - * Conforms to "TCG PC Specific Implementation Specification", - * Version 1.1, August 18, 2003 + * NOTE: There are two versions of the table with the same signature -- + * the client version and the server version. * ******************************************************************************/ -struct acpi_table_tcpa { +struct acpi_table_tcpa_client { struct acpi_table_header header; /* Common ACPI table header */ + u16 platform_class; + u32 minimum_log_length; /* Minimum length for the event log area */ + u64 log_address; /* Address of the event log area */ +}; + +struct acpi_table_tcpa_server { + struct acpi_table_header header; /* Common ACPI table header */ + u16 platform_class; u16 reserved; - u32 max_log_length; /* Maximum length for the event log area */ + u64 minimum_log_length; /* Minimum length for the event log area */ u64 log_address; /* Address of the event log area */ + u16 spec_revision; + u8 device_flags; + u8 interrupt_flags; + u8 gpe_number; + u8 reserved2[3]; + u32 global_interrupt; + struct acpi_generic_address address; + u32 reserved3; + struct acpi_generic_address config_address; + u8 group; + u8 bus; /* PCI Bus/Segment/Function numbers */ + u8 device; + u8 function; +}; + +/* Values for device_flags above */ + +#define ACPI_TCPA_PCI_DEVICE (1) +#define ACPI_TCPA_BUS_PNP (1<<1) +#define ACPI_TCPA_ADDRESS_VALID (1<<2) + +/* Values for interrupt_flags above */ + +#define ACPI_TCPA_INTERRUPT_MODE (1) +#define ACPI_TCPA_INTERRUPT_POLARITY (1<<1) +#define ACPI_TCPA_SCI_VIA_GPE (1<<2) +#define ACPI_TCPA_GLOBAL_INTERRUPT (1<<3) + +/******************************************************************************* + * + * TPM2 - Trusted Platform Module (TPM) 2.0 Hardware Interface Table + * Version 4 + * + * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0", + * December 19, 2014 + * + ******************************************************************************/ + +struct acpi_table_tpm2 { + struct acpi_table_header header; /* Common ACPI table header */ + u16 platform_class; + u16 reserved; + u64 control_address; + u32 start_method; + + /* Platform-specific data follows */ }; +/* Values for start_method above */ + +#define ACPI_TPM2_NOT_ALLOWED 0 +#define ACPI_TPM2_START_METHOD 2 +#define ACPI_TPM2_MEMORY_MAPPED 6 +#define ACPI_TPM2_COMMAND_BUFFER 7 +#define ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD 8 + /******************************************************************************* * * UEFI - UEFI Boot optimization Table diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h index 4018986d2a2e..1df891660f43 100644 --- a/include/acpi/actbl3.h +++ b/include/acpi/actbl3.h @@ -51,7 +51,8 @@ * These tables are not consumed directly by the ACPICA subsystem, but are * included here to support device drivers and the AML disassembler. * - * The tables in this file are fully defined within the ACPI specification. + * In general, the tables in this file are fully defined within the ACPI + * specification. * ******************************************************************************/ @@ -69,7 +70,6 @@ #define ACPI_SIG_PMTT "PMTT" /* Platform Memory Topology Table */ #define ACPI_SIG_RASF "RASF" /* RAS Feature table */ #define ACPI_SIG_STAO "STAO" /* Status Override table */ -#define ACPI_SIG_TPM2 "TPM2" /* Trusted Platform Module 2.0 H/W interface table */ #define ACPI_SIG_WPBT "WPBT" /* Windows Platform Binary Table */ #define ACPI_SIG_XENV "XENV" /* Xen Environment table */ @@ -722,36 +722,6 @@ struct acpi_table_stao { /******************************************************************************* * - * TPM2 - Trusted Platform Module (TPM) 2.0 Hardware Interface Table - * Version 3 - * - * Conforms to "TPM 2.0 Hardware Interface Table (TPM2)" 29 November 2011 - * - ******************************************************************************/ - -struct acpi_table_tpm2 { - struct acpi_table_header header; /* Common ACPI table header */ - u32 flags; - u64 control_address; - u32 start_method; -}; - -/* Control area structure (not part of table, pointed to by control_address) */ - -struct acpi_tpm2_control { - u32 reserved; - u32 error; - u32 cancel; - u32 start; - u64 interrupt_control; - u32 command_size; - u64 command_address; - u32 response_size; - u64 response_address; -}; - -/******************************************************************************* - * * WPBT - Windows Platform Environment Table (ACPI 6.0) * Version 1 * diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 63fd7f5e9fb3..c2a41d223162 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -542,14 +542,14 @@ typedef u64 acpi_integer; #define ACPI_COMPARE_NAME(a,b) (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b))) #define ACPI_MOVE_NAME(dest,src) (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src))) #else -#define ACPI_COMPARE_NAME(a,b) (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE)) -#define ACPI_MOVE_NAME(dest,src) (ACPI_STRNCPY (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE)) +#define ACPI_COMPARE_NAME(a,b) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE)) +#define ACPI_MOVE_NAME(dest,src) (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE)) #endif /* Support for the special RSDP signature (8 characters) */ -#define ACPI_VALIDATE_RSDP_SIG(a) (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8)) -#define ACPI_MAKE_RSDP_SIG(dest) (ACPI_MEMCPY (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) +#define ACPI_VALIDATE_RSDP_SIG(a) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8)) +#define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) /******************************************************************************* * @@ -568,6 +568,7 @@ typedef u64 acpi_integer; #define ACPI_NO_ACPI_ENABLE 0x10 #define ACPI_NO_DEVICE_INIT 0x20 #define ACPI_NO_OBJECT_INIT 0x40 +#define ACPI_NO_FACS_INIT 0x80 /* * Initialization state @@ -1140,6 +1141,10 @@ u32 (*acpi_interface_handler) (acpi_string interface_name, u32 supported); #define ACPI_UUID_LENGTH 16 +/* Length of 3-byte PCI class code values when converted back to a string */ + +#define ACPI_PCICLS_STRING_SIZE 7 /* Includes null terminator */ + /* Structures used for device/processor HID, UID, CID, and SUB */ struct acpi_pnp_device_id { @@ -1162,7 +1167,7 @@ struct acpi_device_info { u32 name; /* ACPI object Name */ acpi_object_type type; /* ACPI object Type */ u8 param_count; /* If a method, required parameter count */ - u8 valid; /* Indicates which optional fields are valid */ + u16 valid; /* Indicates which optional fields are valid */ u8 flags; /* Miscellaneous info */ u8 highest_dstates[4]; /* _sx_d values: 0xFF indicates not valid */ u8 lowest_dstates[5]; /* _sx_w values: 0xFF indicates not valid */ @@ -1171,6 +1176,7 @@ struct acpi_device_info { struct acpi_pnp_device_id hardware_id; /* _HID value */ struct acpi_pnp_device_id unique_id; /* _UID value */ struct acpi_pnp_device_id subsystem_id; /* _SUB value */ + struct acpi_pnp_device_id class_code; /* _CLS value */ struct acpi_pnp_device_id_list compatible_id_list; /* _CID list <must be last> */ }; @@ -1180,14 +1186,15 @@ struct acpi_device_info { /* Flags for Valid field above (acpi_get_object_info) */ -#define ACPI_VALID_STA 0x01 -#define ACPI_VALID_ADR 0x02 -#define ACPI_VALID_HID 0x04 -#define ACPI_VALID_UID 0x08 -#define ACPI_VALID_SUB 0x10 -#define ACPI_VALID_CID 0x20 -#define ACPI_VALID_SXDS 0x40 -#define ACPI_VALID_SXWS 0x80 +#define ACPI_VALID_STA 0x0001 +#define ACPI_VALID_ADR 0x0002 +#define ACPI_VALID_HID 0x0004 +#define ACPI_VALID_UID 0x0008 +#define ACPI_VALID_SUB 0x0010 +#define ACPI_VALID_CID 0x0020 +#define ACPI_VALID_CLS 0x0040 +#define ACPI_VALID_SXDS 0x0100 +#define ACPI_VALID_SXWS 0x0200 /* Flags for _STA return value (current_status above) */ diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index 073997d729e9..3cedd43943f4 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -346,29 +346,6 @@ /* We will be linking to the standard Clib functions */ -#define ACPI_STRSTR(s1,s2) strstr((s1), (s2)) -#define ACPI_STRCHR(s1,c) strchr((s1), (c)) -#define ACPI_STRLEN(s) (acpi_size) strlen((s)) -#define ACPI_STRCPY(d,s) (void) strcpy((d), (s)) -#define ACPI_STRNCPY(d,s,n) (void) strncpy((d), (s), (acpi_size)(n)) -#define ACPI_STRNCMP(d,s,n) strncmp((d), (s), (acpi_size)(n)) -#define ACPI_STRCMP(d,s) strcmp((d), (s)) -#define ACPI_STRCAT(d,s) (void) strcat((d), (s)) -#define ACPI_STRNCAT(d,s,n) strncat((d), (s), (acpi_size)(n)) -#define ACPI_STRTOUL(d,s,n) strtoul((d), (s), (acpi_size)(n)) -#define ACPI_MEMCMP(s1,s2,n) memcmp((const char *)(s1), (const char *)(s2), (acpi_size)(n)) -#define ACPI_MEMCPY(d,s,n) (void) memcpy((d), (s), (acpi_size)(n)) -#define ACPI_MEMSET(d,s,n) (void) memset((d), (s), (acpi_size)(n)) - -#define ACPI_TOUPPER(i) toupper((int) (i)) -#define ACPI_TOLOWER(i) tolower((int) (i)) -#define ACPI_IS_XDIGIT(i) isxdigit((int) (i)) -#define ACPI_IS_DIGIT(i) isdigit((int) (i)) -#define ACPI_IS_SPACE(i) isspace((int) (i)) -#define ACPI_IS_UPPER(i) isupper((int) (i)) -#define ACPI_IS_PRINT(i) isprint((int) (i)) -#define ACPI_IS_ALPHA(i) isalpha((int) (i)) - #else /****************************************************************************** @@ -406,22 +383,6 @@ typedef char *va_list; /* Use the local (ACPICA) definitions of the clib functions */ -#define ACPI_STRSTR(s1,s2) acpi_ut_strstr ((s1), (s2)) -#define ACPI_STRCHR(s1,c) acpi_ut_strchr ((s1), (c)) -#define ACPI_STRLEN(s) (acpi_size) acpi_ut_strlen ((s)) -#define ACPI_STRCPY(d,s) (void) acpi_ut_strcpy ((d), (s)) -#define ACPI_STRNCPY(d,s,n) (void) acpi_ut_strncpy ((d), (s), (acpi_size)(n)) -#define ACPI_STRNCMP(d,s,n) acpi_ut_strncmp ((d), (s), (acpi_size)(n)) -#define ACPI_STRCMP(d,s) acpi_ut_strcmp ((d), (s)) -#define ACPI_STRCAT(d,s) (void) acpi_ut_strcat ((d), (s)) -#define ACPI_STRNCAT(d,s,n) acpi_ut_strncat ((d), (s), (acpi_size)(n)) -#define ACPI_STRTOUL(d,s,n) acpi_ut_strtoul ((d), (s), (acpi_size)(n)) -#define ACPI_MEMCMP(s1,s2,n) acpi_ut_memcmp((const char *)(s1), (const char *)(s2), (acpi_size)(n)) -#define ACPI_MEMCPY(d,s,n) (void) acpi_ut_memcpy ((d), (s), (acpi_size)(n)) -#define ACPI_MEMSET(d,v,n) (void) acpi_ut_memset ((d), (v), (acpi_size)(n)) -#define ACPI_TOUPPER(c) acpi_ut_to_upper ((int) (c)) -#define ACPI_TOLOWER(c) acpi_ut_to_lower ((int) (c)) - #endif /* ACPI_USE_SYSTEM_CLIBRARY */ #ifndef ACPI_FILE diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h index 14dc6f68ca18..0a7dc8e583b1 100644 --- a/include/acpi/platform/acenvex.h +++ b/include/acpi/platform/acenvex.h @@ -56,6 +56,12 @@ #if defined(_LINUX) || defined(__linux__) #include <acpi/platform/aclinuxex.h> +#elif defined(_AED_EFI) +#include "acefiex.h" + +#elif defined(_GNU_EFI) +#include "acefiex.h" + #elif defined(__DragonFly__) #include "acdragonflyex.h" diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index f54de0a63558..5457a06cb528 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -75,4 +75,8 @@ #undef strchr #endif +/* GCC supports __VA_ARGS__ in macros */ + +#define COMPILER_VA_MACRO 1 + #endif /* __ACGCC_H__ */ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 30f92cefaa72..9ebee53d3bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,9 +43,9 @@ struct ceph_options { int flags; struct ceph_fsid fsid; struct ceph_entity_addr my_addr; - int mount_timeout; - int osd_idle_ttl; - int osd_keepalive_timeout; + unsigned long mount_timeout; /* jiffies */ + unsigned long osd_idle_ttl; /* jiffies */ + unsigned long osd_keepalive_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -63,9 +63,9 @@ struct ceph_options { /* * defaults */ -#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_KEEPALIVE_DEFAULT 5 -#define CEPH_OSD_IDLE_TTL_DEFAULT 60 +#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) +#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) @@ -93,13 +93,9 @@ enum { CEPH_MOUNT_SHUTDOWN, }; -/* - * subtract jiffies - */ -static inline unsigned long time_sub(unsigned long a, unsigned long b) +static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) { - BUG_ON(time_after(b, a)); - return (long)a - (long)b; + return timeout ?: MAX_SCHEDULE_TIMEOUT; } struct ceph_mds_client; @@ -178,6 +174,7 @@ static inline int calc_pages_for(u64 off, u64 len) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 61b19c46bdb3..7506b485bb6d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -249,7 +249,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); extern void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode); + unsigned int which, u16 opcode, u32 flags); extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, unsigned int which, diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 48a1a7d100f1..48b49305716b 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -1,7 +1,11 @@ #ifndef CEPH_CRUSH_CRUSH_H #define CEPH_CRUSH_CRUSH_H -#include <linux/types.h> +#ifdef __KERNEL__ +# include <linux/types.h> +#else +# include "crush_compat.h" +#endif /* * CRUSH is a pseudo-random data distribution algorithm that @@ -20,7 +24,11 @@ #define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ +#define CRUSH_MAX_RULESET (1<<8) /* max crush ruleset number */ +#define CRUSH_MAX_RULES CRUSH_MAX_RULESET /* should be the same as max rulesets */ +#define CRUSH_MAX_DEVICE_WEIGHT (100u * 0x10000u) +#define CRUSH_MAX_BUCKET_WEIGHT (65535u * 0x10000u) #define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */ #define CRUSH_ITEM_NONE 0x7fffffff /* no result */ @@ -108,6 +116,15 @@ enum { }; extern const char *crush_bucket_alg_name(int alg); +/* + * although tree was a legacy algorithm, it has been buggy, so + * exclude it. + */ +#define CRUSH_LEGACY_ALLOWED_BUCKET_ALGS ( \ + (1 << CRUSH_BUCKET_UNIFORM) | \ + (1 << CRUSH_BUCKET_LIST) | \ + (1 << CRUSH_BUCKET_STRAW)) + struct crush_bucket { __s32 id; /* this'll be negative */ __u16 type; /* non-zero; type=0 is reserved for devices */ @@ -174,7 +191,7 @@ struct crush_map { /* choose local attempts using a fallback permutation before * re-descent */ __u32 choose_local_fallback_tries; - /* choose attempts before giving up */ + /* choose attempts before giving up */ __u32 choose_total_tries; /* attempt chooseleaf inner descent once for firstn mode; on * reject retry outer descent. Note that this does *not* @@ -187,6 +204,25 @@ struct crush_map { * that want to limit reshuffling, a value of 3 or 4 will make the * mappings line up a bit better with previous mappings. */ __u8 chooseleaf_vary_r; + +#ifndef __KERNEL__ + /* + * version 0 (original) of straw_calc has various flaws. version 1 + * fixes a few of them. + */ + __u8 straw_calc_version; + + /* + * allowed bucket algs is a bitmask, here the bit positions + * are CRUSH_BUCKET_*. note that these are *bits* and + * CRUSH_BUCKET_* values are not, so we need to or together (1 + * << CRUSH_BUCKET_WHATEVER). The 0th bit is not used to + * minimize confusion (bucket type values start at 1). + */ + __u32 allowed_bucket_algs; + + __u32 *choose_tries; +#endif }; diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h index 91e884230d5d..d1d90258242e 100644 --- a/include/linux/crush/hash.h +++ b/include/linux/crush/hash.h @@ -1,6 +1,12 @@ #ifndef CEPH_CRUSH_HASH_H #define CEPH_CRUSH_HASH_H +#ifdef __KERNEL__ +# include <linux/types.h> +#else +# include "crush_compat.h" +#endif + #define CRUSH_HASH_RJENKINS1 0 #define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1 diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index eab367446eea..5dfd5b1125d2 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -8,7 +8,7 @@ * LGPL2 */ -#include <linux/crush/crush.h> +#include "crush.h" extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size); extern int crush_do_rule(const struct crush_map *map, diff --git a/include/linux/device.h b/include/linux/device.h index 00ac57c26615..5a31bf3a4024 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1300,4 +1300,26 @@ static void __exit __driver##_exit(void) \ } \ module_exit(__driver##_exit); +/** + * builtin_driver() - Helper macro for drivers that don't do anything + * special in init and have no exit. This eliminates some boilerplate. + * Each driver may only use this macro once, and calling it replaces + * device_initcall (or in some cases, the legacy __initcall). This is + * meant to be a direct parallel of module_driver() above but without + * the __exit stuff that is not used for builtin cases. + * + * @__driver: driver name + * @__register: register function for this driver type + * @...: Additional arguments to be passed to __register + * + * Use this macro to construct bus specific macros for registering + * drivers, and do not use it on its own. + */ +#define builtin_driver(__driver, __register, ...) \ +static int __init __driver##_init(void) \ +{ \ + return __register(&(__driver) , ##__VA_ARGS__); \ +} \ +device_initcall(__driver##_init); + #endif /* _DEVICE_H_ */ diff --git a/include/linux/init.h b/include/linux/init.h index 21b6d768edd7..7c68c36d3fd8 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -91,14 +91,6 @@ #define __exit __section(.exit.text) __exitused __cold notrace -/* temporary, until all users are removed */ -#define __cpuinit -#define __cpuinitdata -#define __cpuinitconst -#define __cpuexit -#define __cpuexitdata -#define __cpuexitconst - /* Used for MEMORY_HOTPLUG */ #define __meminit __section(.meminit.text) __cold notrace #define __meminitdata __section(.meminit.data) @@ -116,9 +108,6 @@ #define __INITRODATA .section ".init.rodata","a",%progbits #define __FINITDATA .previous -/* temporary, until all users are removed */ -#define __CPUINIT - #define __MEMINIT .section ".meminit.text", "ax" #define __MEMINITDATA .section ".meminit.data", "aw" #define __MEMINITRODATA .section ".meminit.rodata", "a" diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 32201c269890..b8e72aad919c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -500,6 +500,7 @@ enum { NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, + NFSPROC4_CLNT_LAYOUTSTATS, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce083..f91b5ade30c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -219,6 +219,7 @@ struct nfs_inode { #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ +#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5e1273d4de14..a2ea1491d3df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -237,5 +237,6 @@ struct nfs_server { #define NFS_CAP_SEEK (1U << 19) #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) +#define NFS_CAP_LAYOUTSTATS (1U << 22) #endif diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3eb072dbce83..f2f650f136ee 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -67,7 +67,6 @@ struct nfs_rw_ops { const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); - void (*rw_release)(struct nfs_pgio_header *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 93ab6071bbe9..7bbe50504211 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -316,6 +316,49 @@ struct nfs4_layoutreturn { int rpc_status; }; +#define PNFS_LAYOUTSTATS_MAXSIZE 256 + +struct nfs42_layoutstat_args; +struct nfs42_layoutstat_devinfo; +typedef void (*layoutstats_encode_t)(struct xdr_stream *, + struct nfs42_layoutstat_args *, + struct nfs42_layoutstat_devinfo *); + +/* Per file per deviceid layoutstats */ +struct nfs42_layoutstat_devinfo { + struct nfs4_deviceid dev_id; + __u64 offset; + __u64 length; + __u64 read_count; + __u64 read_bytes; + __u64 write_count; + __u64 write_bytes; + __u32 layout_type; + layoutstats_encode_t layoutstats_encode; + void *layout_private; +}; + +struct nfs42_layoutstat_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + struct inode *inode; + nfs4_stateid stateid; + int num_dev; + struct nfs42_layoutstat_devinfo *devinfo; +}; + +struct nfs42_layoutstat_res { + struct nfs4_sequence_res seq_res; + int num_dev; + int rpc_status; +}; + +struct nfs42_layoutstat_data { + struct inode *inode; + struct nfs42_layoutstat_args args; + struct nfs42_layoutstat_res res; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; @@ -984,17 +1027,14 @@ struct nfs4_readlink_res { struct nfs4_sequence_res seq_res; }; -#define NFS4_SETCLIENTID_NAMELEN (127) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; - unsigned int sc_name_len; - char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; u32 sc_prog; unsigned int sc_netid_len; char sc_netid[RPCBIND_MAXNETIDLEN + 1]; unsigned int sc_uaddr_len; char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; - u32 sc_cb_ident; + struct nfs_client *sc_clnt; struct rpc_cred *sc_cred; }; @@ -1142,12 +1182,9 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (48) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; - unsigned int id_len; - char id[NFS4_EXCHANGE_ID_LEN]; u32 flags; struct nfs41_state_protection state_protect; }; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 58f1e75ba105..bba08f44cc97 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -222,6 +222,15 @@ static inline void platform_set_drvdata(struct platform_device *pdev, module_driver(__platform_driver, platform_driver_register, \ platform_driver_unregister) +/* builtin_platform_driver() - Helper macro for builtin drivers that + * don't do anything special in driver init. This eliminates some + * boilerplate. Each driver may only use this macro once, and + * calling it replaces device_initcall(). Note this is meant to be + * a parallel of module_platform_driver() above, but w/o _exit stuff. + */ +#define builtin_platform_driver(__platform_driver) \ + builtin_driver(__platform_driver, platform_driver_register) + /* module_platform_driver_probe() - Helper macro for drivers that don't do * anything special in module init/exit. This eliminates a lot of * boilerplate. Each module may only use this macro once, and @@ -240,6 +249,20 @@ static void __exit __platform_driver##_exit(void) \ } \ module_exit(__platform_driver##_exit); +/* builtin_platform_driver_probe() - Helper macro for drivers that don't do + * anything special in device init. This eliminates some boilerplate. Each + * driver may only use this macro once, and using it replaces device_initcall. + * This is meant to be a parallel of module_platform_driver_probe above, but + * without the __exit parts. + */ +#define builtin_platform_driver_probe(__platform_driver, __platform_probe) \ +static int __init __platform_driver##_init(void) \ +{ \ + return platform_driver_probe(&(__platform_driver), \ + __platform_probe); \ +} \ +device_initcall(__platform_driver##_init); \ + #define platform_create_bundle(driver, probe, res, n_res, data, size) \ __platform_create_bundle(driver, probe, res, n_res, data, size, THIS_MODULE) extern struct platform_device *__platform_create_bundle( diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 2ca67b55e0fe..8df43c9f11dc 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -37,7 +37,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); -int bc_send(struct rpc_rqst *req); /* * Determine if a shared backchannel is in use diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 598ba80ec30c..131032f15cc1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -56,6 +56,7 @@ struct rpc_clnt { struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ + atomic_t cl_swapper; /* swapfile count */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME+1]; struct rpc_pipe_dir_head cl_pipedir_objects; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 5f1e6bd4c316..d703f0ef37d8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -205,8 +205,7 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *ops); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); void rpc_exit_task(struct rpc_task *); @@ -269,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, } #endif +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int rpc_clnt_swap_activate(struct rpc_clnt *clnt); +void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt); +#else +static inline int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + return -EINVAL; +} + +static inline void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ +} +#endif /* CONFIG_SUNRPC_SWAP */ + #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8b93ef53df3c..0fb9acbb4780 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -133,6 +133,9 @@ struct rpc_xprt_ops { void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); + int (*enable_swap)(struct rpc_xprt *xprt); + void (*disable_swap)(struct rpc_xprt *xprt); + void (*inject_disconnect)(struct rpc_xprt *xprt); }; /* @@ -180,7 +183,7 @@ struct rpc_xprt { atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char resvport : 1; /* use a reserved port */ - unsigned int swapper; /* we're swapping over this + atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ @@ -212,7 +215,8 @@ struct rpc_xprt { #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ - unsigned int bc_alloc_count; /* Total number of preallocs */ + int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_free_slots; spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated @@ -241,6 +245,7 @@ struct rpc_xprt { const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *debugfs; /* debugfs directory */ + atomic_t inject_disconnect; #endif }; @@ -327,6 +332,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * return p + xprt->tsh_size; } +static inline int +xprt_enable_swap(struct rpc_xprt *xprt) +{ + return xprt->ops->enable_swap(xprt); +} + +static inline void +xprt_disable_swap(struct rpc_xprt *xprt) +{ + xprt->ops->disable_swap(xprt); +} + /* * Transport switch helper functions */ @@ -345,7 +362,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper(struct rpc_xprt *xprt, int enable); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); @@ -431,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +extern unsigned int rpc_inject_disconnect; +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ + if (!rpc_inject_disconnect) + return; + if (atomic_dec_return(&xprt->inject_disconnect)) + return; + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); + xprt->ops->inject_disconnect(xprt); +} +#else +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ +} +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index c984c85981ea..b17613052cc3 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -56,7 +56,8 @@ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ -/* memory registration strategies */ +/* Memory registration strategies, by number. + * This is part of a kernel / user space API. Do not remove. */ enum rpcrdma_memreg { RPCRDMA_BOUNCEBUFFERS = 0, RPCRDMA_REGISTER, diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d3f4832db289..b6fce900a833 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -313,6 +313,9 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VA_OP_MAP 1 #define AMDGPU_VA_OP_UNMAP 2 +/* Delay the page table update till the next CS */ +#define AMDGPU_VM_DELAY_UPDATE (1 << 0) + /* Mapping flags */ /* readable mapping */ #define AMDGPU_VM_PAGE_READABLE (1 << 1) @@ -348,6 +351,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_IB 0x01 #define AMDGPU_CHUNK_ID_FENCE 0x02 +#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 struct drm_amdgpu_cs_chunk { uint32_t chunk_id; @@ -399,6 +403,14 @@ struct drm_amdgpu_cs_chunk_ib { uint32_t ring; }; +struct drm_amdgpu_cs_chunk_dep { + uint32_t ip_type; + uint32_t ip_instance; + uint32_t ring; + uint32_t ctx_id; + uint64_t handle; +}; + struct drm_amdgpu_cs_chunk_fence { uint32_t handle; uint32_t offset; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 25084a052a1e..c9aca042e61d 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -755,4 +755,7 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +/* Device ioctls: */ +#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) + #endif /* _LINUX_FUSE_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 60c302cfb4d3..43c4c920f30a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -137,7 +137,7 @@ endif ifneq ($(wildcard $(obj)/.x509.list),) ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES)) -$(info X.509 certificate list changed) +$(warning X.509 certificate list changed to "$(X509_CERTIFICATES)" from "$(shell cat $(obj)/.x509.list)") $(shell rm $(obj)/.x509.list) endif endif diff --git a/lib/Makefile b/lib/Makefile index ff37c8c2f7b2..6897b527581a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -45,6 +45,9 @@ CFLAGS_kobject.o += -DDEBUG CFLAGS_kobject_uevent.o += -DDEBUG endif +obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o +CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any) + obj-$(CONFIG_GENERIC_IOMAP) += iomap.o obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o diff --git a/lib/debug_info.c b/lib/debug_info.c new file mode 100644 index 000000000000..2edbe27517ed --- /dev/null +++ b/lib/debug_info.c @@ -0,0 +1,27 @@ +/* + * This file exists solely to ensure debug information for some core + * data structures is included in the final image even for + * CONFIG_DEBUG_INFO_REDUCED. Please do not add actual code. However, + * adding appropriate #includes is fine. + */ +#include <stdarg.h> + +#include <linux/cred.h> +#include <linux/crypto.h> +#include <linux/dcache.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/fscache-cache.h> +#include <linux/io.h> +#include <linux/kallsyms.h> +#include <linux/kernel.h> +#include <linux/kobject.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <net/addrconf.h> +#include <net/sock.h> +#include <net/tcp.h> diff --git a/lib/list_sort.c b/lib/list_sort.c index b29015102698..3fe401067e20 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -289,5 +289,5 @@ exit: kfree(elts); return err; } -module_init(list_sort_test); +late_initcall(list_sort_test); #endif /* CONFIG_TEST_LIST_SORT */ diff --git a/mm/nommu.c b/mm/nommu.c index 05e7447d960b..58ea3643b9e9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -2085,7 +2085,7 @@ static int __meminit init_user_reserve(void) sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); return 0; } -module_init(init_user_reserve) +subsys_initcall(init_user_reserve); /* * Initialise sysctl_admin_reserve_kbytes. @@ -2106,4 +2106,4 @@ static int __meminit init_admin_reserve(void) sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); return 0; } -module_init(init_admin_reserve) +subsys_initcall(init_admin_reserve); diff --git a/mm/page_owner.c b/mm/page_owner.c index 0993f5f36b01..bd5f842b56d2 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -310,4 +310,4 @@ static int __init pageowner_init(void) return 0; } -module_init(pageowner_init) +late_initcall(pageowner_init) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 79e8f71aef5b..cb7db320dd27 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name, /* start with defaults */ opt->flags = CEPH_OPT_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; - opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ - opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ + opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; + opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* get mon ip(s) */ /* ip1[:port1][,ip2[:port2]...] */ @@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name, pr_warn("ignoring deprecated osdtimeout option\n"); break; case Opt_osdkeepalivetimeout: - opt->osd_keepalive_timeout = intval; + /* 0 isn't well defined right now, reject it */ + if (intval < 1 || intval > INT_MAX / 1000) { + pr_err("osdkeepalive out of range\n"); + err = -EINVAL; + goto out; + } + opt->osd_keepalive_timeout = + msecs_to_jiffies(intval * 1000); break; case Opt_osd_idle_ttl: - opt->osd_idle_ttl = intval; + /* 0 isn't well defined right now, reject it */ + if (intval < 1 || intval > INT_MAX / 1000) { + pr_err("osd_idle_ttl out of range\n"); + err = -EINVAL; + goto out; + } + opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000); break; case Opt_mount_timeout: - opt->mount_timeout = intval; + /* 0 is "wait forever" (i.e. infinite timeout) */ + if (intval < 0 || intval > INT_MAX / 1000) { + pr_err("mount_timeout out of range\n"); + err = -EINVAL; + goto out; + } + opt->mount_timeout = msecs_to_jiffies(intval * 1000); break; case Opt_share: @@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) seq_puts(m, "notcp_nodelay,"); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) - seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); + seq_printf(m, "mount_timeout=%d,", + jiffies_to_msecs(opt->mount_timeout) / 1000); if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) - seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); + seq_printf(m, "osd_idle_ttl=%d,", + jiffies_to_msecs(opt->osd_idle_ttl) / 1000); if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) seq_printf(m, "osdkeepalivetimeout=%d,", - opt->osd_keepalive_timeout); + jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); /* drop redundant comma */ if (m->count != pos) @@ -626,8 +647,8 @@ static int have_mon_and_osd_map(struct ceph_client *client) */ int __ceph_open_session(struct ceph_client *client, unsigned long started) { - int err; - unsigned long timeout = client->options->mount_timeout * HZ; + unsigned long timeout = client->options->mount_timeout; + long err; /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); @@ -635,16 +656,15 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) return err; while (!have_mon_and_osd_map(client)) { - err = -EIO; if (timeout && time_after_eq(jiffies, started + timeout)) - return err; + return -ETIMEDOUT; /* wait */ dout("mount waiting for mon_map\n"); err = wait_event_interruptible_timeout(client->auth_wq, have_mon_and_osd_map(client) || (client->auth_err < 0), - timeout); - if (err == -EINTR || err == -ERESTARTSYS) + ceph_timeout_jiffies(timeout)); + if (err < 0) return err; if (client->auth_err < 0) return client->auth_err; @@ -721,5 +741,5 @@ module_exit(exit_ceph_lib); MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); -MODULE_DESCRIPTION("Ceph filesystem for Linux"); +MODULE_DESCRIPTION("Ceph core library"); MODULE_LICENSE("GPL"); diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 9d84ce4ea0df..80d7c3a97cb8 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c @@ -1,15 +1,11 @@ - #ifdef __KERNEL__ # include <linux/slab.h> +# include <linux/crush/crush.h> #else -# include <stdlib.h> -# include <assert.h> -# define kfree(x) do { if (x) free(x); } while (0) -# define BUG_ON(x) assert(!(x)) +# include "crush_compat.h" +# include "crush.h" #endif -#include <linux/crush/crush.h> - const char *crush_bucket_alg_name(int alg) { switch (alg) { @@ -134,6 +130,9 @@ void crush_destroy(struct crush_map *map) kfree(map->rules); } +#ifndef __KERNEL__ + kfree(map->choose_tries); +#endif kfree(map); } diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h index 6192c7fc958c..aae534c901a4 100644 --- a/net/ceph/crush/crush_ln_table.h +++ b/net/ceph/crush/crush_ln_table.h @@ -10,20 +10,20 @@ * */ -#if defined(__linux__) -#include <linux/types.h> -#elif defined(__FreeBSD__) -#include <sys/types.h> -#endif - #ifndef CEPH_CRUSH_LN_H #define CEPH_CRUSH_LN_H +#ifdef __KERNEL__ +# include <linux/types.h> +#else +# include "crush_compat.h" +#endif -// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) -// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) - -static int64_t __RH_LH_tbl[128*2+2] = { +/* + * RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) + * RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) + */ +static __s64 __RH_LH_tbl[128*2+2] = { 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll, 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all, 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll, @@ -89,11 +89,12 @@ static int64_t __RH_LH_tbl[128*2+2] = { 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll, 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll, 0x0000800000000000ll, 0x0000ffff00000000ll, - }; - +}; - // LL_tbl[k] = 2^48*log2(1.0+k/2^15); -static int64_t __LL_tbl[256] = { +/* + * LL_tbl[k] = 2^48*log2(1.0+k/2^15) + */ +static __s64 __LL_tbl[256] = { 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull, 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull, 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull, @@ -160,7 +161,4 @@ static int64_t __LL_tbl[256] = { 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull, }; - - - #endif diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c index 5bb63e37a8a1..ed123af49eba 100644 --- a/net/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c @@ -1,6 +1,8 @@ - -#include <linux/types.h> -#include <linux/crush/hash.h> +#ifdef __KERNEL__ +# include <linux/crush/hash.h> +#else +# include "hash.h" +#endif /* * Robert Jenkins' function for mixing 32-bit values diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 5b47736d27d9..393bfb22d5bb 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -1,27 +1,31 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2015 Intel Corporation All Rights Reserved + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ #ifdef __KERNEL__ # include <linux/string.h> # include <linux/slab.h> # include <linux/bug.h> # include <linux/kernel.h> -# ifndef dprintk -# define dprintk(args...) -# endif +# include <linux/crush/crush.h> +# include <linux/crush/hash.h> #else -# include <string.h> -# include <stdio.h> -# include <stdlib.h> -# include <assert.h> -# define BUG_ON(x) assert(!(x)) -# define dprintk(args...) /* printf(args) */ -# define kmalloc(x, f) malloc(x) -# define kfree(x) free(x) +# include "crush_compat.h" +# include "crush.h" +# include "hash.h" #endif - -#include <linux/crush/crush.h> -#include <linux/crush/hash.h> #include "crush_ln_table.h" +#define dprintk(args...) /* printf(args) */ + /* * Implement the core CRUSH mapping algorithm. */ @@ -139,7 +143,7 @@ static int bucket_list_choose(struct crush_bucket_list *bucket, int i; for (i = bucket->h.size-1; i >= 0; i--) { - __u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i], + __u64 w = crush_hash32_4(bucket->h.hash, x, bucket->h.items[i], r, bucket->h.id); w &= 0xffff; dprintk("list_choose i=%d x=%d r=%d item %d weight %x " @@ -238,43 +242,46 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, return bucket->h.items[high]; } -// compute 2^44*log2(input+1) -uint64_t crush_ln(unsigned xin) +/* compute 2^44*log2(input+1) */ +static __u64 crush_ln(unsigned int xin) { - unsigned x=xin, x1; - int iexpon, index1, index2; - uint64_t RH, LH, LL, xl64, result; + unsigned int x = xin, x1; + int iexpon, index1, index2; + __u64 RH, LH, LL, xl64, result; - x++; + x++; - // normalize input - iexpon = 15; - while(!(x&0x18000)) { x<<=1; iexpon--; } + /* normalize input */ + iexpon = 15; + while (!(x & 0x18000)) { + x <<= 1; + iexpon--; + } - index1 = (x>>8)<<1; - // RH ~ 2^56/index1 - RH = __RH_LH_tbl[index1 - 256]; - // LH ~ 2^48 * log2(index1/256) - LH = __RH_LH_tbl[index1 + 1 - 256]; + index1 = (x >> 8) << 1; + /* RH ~ 2^56/index1 */ + RH = __RH_LH_tbl[index1 - 256]; + /* LH ~ 2^48 * log2(index1/256) */ + LH = __RH_LH_tbl[index1 + 1 - 256]; - // RH*x ~ 2^48 * (2^15 + xf), xf<2^8 - xl64 = (int64_t)x * RH; - xl64 >>= 48; - x1 = xl64; + /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */ + xl64 = (__s64)x * RH; + xl64 >>= 48; + x1 = xl64; - result = iexpon; - result <<= (12 + 32); + result = iexpon; + result <<= (12 + 32); - index2 = x1 & 0xff; - // LL ~ 2^48*log2(1.0+index2/2^15) - LL = __LL_tbl[index2]; + index2 = x1 & 0xff; + /* LL ~ 2^48*log2(1.0+index2/2^15) */ + LL = __LL_tbl[index2]; - LH = LH + LL; + LH = LH + LL; - LH >>= (48-12 - 32); - result += LH; + LH >>= (48 - 12 - 32); + result += LH; - return result; + return result; } @@ -290,9 +297,9 @@ uint64_t crush_ln(unsigned xin) static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, int x, int r) { - unsigned i, high = 0; - unsigned u; - unsigned w; + unsigned int i, high = 0; + unsigned int u; + unsigned int w; __s64 ln, draw, high_draw = 0; for (i = 0; i < bucket->h.size; i++) { @@ -567,6 +574,10 @@ reject: out[outpos] = item; outpos++; count--; +#ifndef __KERNEL__ + if (map->choose_tries && ftotal <= map->choose_total_tries) + map->choose_tries[ftotal]++; +#endif } dprintk("CHOOSE returns %d\n", outpos); @@ -610,6 +621,20 @@ static void crush_choose_indep(const struct crush_map *map, } for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) { +#ifdef DEBUG_INDEP + if (out2 && ftotal) { + dprintk("%u %d a: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out[rep]); + } + dprintk("\n"); + dprintk("%u %d b: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out2[rep]); + } + dprintk("\n"); + } +#endif for (rep = outpos; rep < endpos; rep++) { if (out[rep] != CRUSH_ITEM_UNDEF) continue; @@ -726,6 +751,24 @@ static void crush_choose_indep(const struct crush_map *map, out2[rep] = CRUSH_ITEM_NONE; } } +#ifndef __KERNEL__ + if (map->choose_tries && ftotal <= map->choose_total_tries) + map->choose_tries[ftotal]++; +#endif +#ifdef DEBUG_INDEP + if (out2) { + dprintk("%u %d a: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out[rep]); + } + dprintk("\n"); + dprintk("%u %d b: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out2[rep]); + } + dprintk("\n"); + } +#endif } /** @@ -790,8 +833,15 @@ int crush_do_rule(const struct crush_map *map, switch (curstep->op) { case CRUSH_RULE_TAKE: - w[0] = curstep->arg1; - wsize = 1; + if ((curstep->arg1 >= 0 && + curstep->arg1 < map->max_devices) || + (-1-curstep->arg1 < map->max_buckets && + map->buckets[-1-curstep->arg1])) { + w[0] = curstep->arg1; + wsize = 1; + } else { + dprintk(" bad take value %d\n", curstep->arg1); + } break; case CRUSH_RULE_SET_CHOOSE_TRIES: @@ -877,7 +927,7 @@ int crush_do_rule(const struct crush_map *map, 0); } else { out_size = ((numrep < (result_max-osize)) ? - numrep : (result_max-osize)); + numrep : (result_max-osize)); crush_choose_indep( map, map->buckets[-1-w[i]], @@ -923,5 +973,3 @@ int crush_do_rule(const struct crush_map *map, } return result_len; } - - diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 073262fea6dd..1679f47280e2 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -278,7 +278,6 @@ static void _ceph_msgr_exit(void) ceph_msgr_slab_exit(); BUG_ON(zero_page == NULL); - kunmap(zero_page); page_cache_release(zero_page); zero_page = NULL; } @@ -1545,7 +1544,7 @@ static int write_partial_message_data(struct ceph_connection *con) page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, &last_piece); ret = ceph_tcp_sendpage(con->sock, page, page_offset, - length, last_piece); + length, !last_piece); if (ret <= 0) { if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 2b3cf05e87b0..9d6ff1215928 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -298,21 +298,28 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) } EXPORT_SYMBOL(ceph_monc_request_next_osdmap); +/* + * Wait for an osdmap with a given epoch. + * + * @epoch: epoch to wait for + * @timeout: in jiffies, 0 means "wait forever" + */ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, unsigned long timeout) { unsigned long started = jiffies; - int ret; + long ret; mutex_lock(&monc->mutex); while (monc->have_osdmap < epoch) { mutex_unlock(&monc->mutex); - if (timeout != 0 && time_after_eq(jiffies, started + timeout)) + if (timeout && time_after_eq(jiffies, started + timeout)) return -ETIMEDOUT; ret = wait_event_interruptible_timeout(monc->client->auth_wq, - monc->have_osdmap >= epoch, timeout); + monc->have_osdmap >= epoch, + ceph_timeout_jiffies(timeout)); if (ret < 0) return ret; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index c4ec9239249a..50033677c0fa 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -296,6 +296,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, case CEPH_OSD_OP_CMPXATTR: ceph_osd_data_release(&op->xattr.osd_data); break; + case CEPH_OSD_OP_STAT: + ceph_osd_data_release(&op->raw_data_in); + break; default: break; } @@ -450,7 +453,7 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE) */ static struct ceph_osd_req_op * _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, - u16 opcode) + u16 opcode, u32 flags) { struct ceph_osd_req_op *op; @@ -460,14 +463,15 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, op = &osd_req->r_ops[which]; memset(op, 0, sizeof (*op)); op->op = opcode; + op->flags = flags; return op; } void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode) + unsigned int which, u16 opcode, u32 flags) { - (void)_osd_req_op_init(osd_req, which, opcode); + (void)_osd_req_op_init(osd_req, which, opcode, flags); } EXPORT_SYMBOL(osd_req_op_init); @@ -476,7 +480,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, u64 offset, u64 length, u64 truncate_size, u32 truncate_seq) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); size_t payload_len = 0; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && @@ -515,7 +520,8 @@ EXPORT_SYMBOL(osd_req_op_extent_update); void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *class, const char *method) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); struct ceph_pagelist *pagelist; size_t payload_len = 0; size_t size; @@ -552,7 +558,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *name, const void *value, size_t size, u8 cmp_op, u8 cmp_mode) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); struct ceph_pagelist *pagelist; size_t payload_len; @@ -585,7 +592,8 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 cookie, u64 version, int flag) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH); @@ -602,7 +610,8 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, u64 expected_write_size) { struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - CEPH_OSD_OP_SETALLOCHINT); + CEPH_OSD_OP_SETALLOCHINT, + 0); op->alloc_hint.expected_object_size = expected_object_size; op->alloc_hint.expected_write_size = expected_write_size; @@ -786,7 +795,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, } if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { - osd_req_op_init(req, which, opcode); + osd_req_op_init(req, which, opcode, 0); } else { u32 object_size = le32_to_cpu(layout->fl_object_size); u32 object_base = off - objoff; @@ -1088,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, BUG_ON(!list_empty(&osd->o_osd_lru)); list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); - osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; + osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl; } static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc, @@ -1199,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) static void __schedule_osd_timeout(struct ceph_osd_client *osdc) { schedule_delayed_work(&osdc->timeout_work, - osdc->client->options->osd_keepalive_timeout * HZ); + osdc->client->options->osd_keepalive_timeout); } static void __cancel_osd_timeout(struct ceph_osd_client *osdc) @@ -1567,10 +1576,9 @@ static void handle_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, timeout_work.work); + struct ceph_options *opts = osdc->client->options; struct ceph_osd_request *req; struct ceph_osd *osd; - unsigned long keepalive = - osdc->client->options->osd_keepalive_timeout * HZ; struct list_head slow_osds; dout("timeout\n"); down_read(&osdc->map_sem); @@ -1586,7 +1594,8 @@ static void handle_timeout(struct work_struct *work) */ INIT_LIST_HEAD(&slow_osds); list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { - if (time_before(jiffies, req->r_stamp + keepalive)) + if (time_before(jiffies, + req->r_stamp + opts->osd_keepalive_timeout)) break; osd = req->r_osd; @@ -1613,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work) struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, osds_timeout_work.work); - unsigned long delay = - osdc->client->options->osd_idle_ttl * HZ >> 2; + unsigned long delay = osdc->client->options->osd_idle_ttl / 4; dout("osds timeout\n"); down_read(&osdc->map_sem); @@ -2619,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) osdc->event_count = 0; schedule_delayed_work(&osdc->osds_timeout_work, - round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); + round_jiffies_relative(osdc->client->options->osd_idle_ttl)); err = -ENOMEM; osdc->req_mempool = mempool_create_kmalloc_pool(10, diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 15796696d64e..4a3125836b64 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -89,7 +89,7 @@ static int crush_decode_tree_bucket(void **p, void *end, { int j; dout("crush_decode_tree_bucket %p to %p\n", *p, end); - ceph_decode_32_safe(p, end, b->num_nodes, bad); + ceph_decode_8_safe(p, end, b->num_nodes, bad); b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS); if (b->node_weights == NULL) return -ENOMEM; diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 096d91447e06..d4f5f220a8e5 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -51,10 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty) set_page_dirty_lock(pages[i]); put_page(pages[i]); } - if (is_vmalloc_addr(pages)) - vfree(pages); - else - kfree(pages); + kvfree(pages); } EXPORT_SYMBOL(ceph_put_page_vector); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 65de0684e22a..61eafc9b4545 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -197,11 +197,4 @@ static int __init ipv4_netfilter_init(void) { return nf_register_afinfo(&nf_ip_afinfo); } - -static void __exit ipv4_netfilter_fini(void) -{ - nf_unregister_afinfo(&nf_ip_afinfo); -} - -module_init(ipv4_netfilter_init); -module_exit(ipv4_netfilter_fini); +subsys_initcall(ipv4_netfilter_init); diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 936ad0a15371..b512fbd9d79a 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -14,6 +14,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o -sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o +sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9dd0ea8db463..9825ff0f91d6 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) { - return xprt->bc_alloc_count > 0; + return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots); } static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) { + atomic_add(n, &xprt->bc_free_slots); xprt->bc_alloc_count += n; } static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) { + atomic_sub(n, &xprt->bc_free_slots); return xprt->bc_alloc_count -= n; } @@ -60,13 +62,62 @@ static void xprt_free_allocation(struct rpc_rqst *req) dprintk("RPC: free allocations for req= %p\n", req); WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); - xbufp = &req->rq_private_buf; + xbufp = &req->rq_rcv_buf; free_page((unsigned long)xbufp->head[0].iov_base); xbufp = &req->rq_snd_buf; free_page((unsigned long)xbufp->head[0].iov_base); kfree(req); } +static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags) +{ + struct page *page; + /* Preallocate one XDR receive buffer */ + page = alloc_page(gfp_flags); + if (page == NULL) + return -ENOMEM; + buf->head[0].iov_base = page_address(page); + buf->head[0].iov_len = PAGE_SIZE; + buf->tail[0].iov_base = NULL; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->len = 0; + buf->buflen = PAGE_SIZE; + return 0; +} + +static +struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags) +{ + struct rpc_rqst *req; + + /* Pre-allocate one backchannel rpc_rqst */ + req = kzalloc(sizeof(*req), gfp_flags); + if (req == NULL) + return NULL; + + req->rq_xprt = xprt; + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_bc_list); + + /* Preallocate one XDR receive buffer */ + if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) { + printk(KERN_ERR "Failed to create bc receive xbuf\n"); + goto out_free; + } + req->rq_rcv_buf.len = PAGE_SIZE; + + /* Preallocate one XDR send buffer */ + if (xprt_alloc_xdr_buf(&req->rq_snd_buf, gfp_flags) < 0) { + printk(KERN_ERR "Failed to create bc snd xbuf\n"); + goto out_free; + } + return req; +out_free: + xprt_free_allocation(req); + return NULL; +} + /* * Preallocate up to min_reqs structures and related buffers for use * by the backchannel. This function can be called multiple times @@ -87,9 +138,7 @@ static void xprt_free_allocation(struct rpc_rqst *req) */ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) { - struct page *page_rcv = NULL, *page_snd = NULL; - struct xdr_buf *xbufp = NULL; - struct rpc_rqst *req, *tmp; + struct rpc_rqst *req; struct list_head tmp_list; int i; @@ -106,7 +155,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) INIT_LIST_HEAD(&tmp_list); for (i = 0; i < min_reqs; i++) { /* Pre-allocate one backchannel rpc_rqst */ - req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + req = xprt_alloc_bc_req(xprt, GFP_KERNEL); if (req == NULL) { printk(KERN_ERR "Failed to create bc rpc_rqst\n"); goto out_free; @@ -115,41 +164,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) /* Add the allocated buffer to the tmp list */ dprintk("RPC: adding req= %p\n", req); list_add(&req->rq_bc_pa_list, &tmp_list); - - req->rq_xprt = xprt; - INIT_LIST_HEAD(&req->rq_list); - INIT_LIST_HEAD(&req->rq_bc_list); - - /* Preallocate one XDR receive buffer */ - page_rcv = alloc_page(GFP_KERNEL); - if (page_rcv == NULL) { - printk(KERN_ERR "Failed to create bc receive xbuf\n"); - goto out_free; - } - xbufp = &req->rq_rcv_buf; - xbufp->head[0].iov_base = page_address(page_rcv); - xbufp->head[0].iov_len = PAGE_SIZE; - xbufp->tail[0].iov_base = NULL; - xbufp->tail[0].iov_len = 0; - xbufp->page_len = 0; - xbufp->len = PAGE_SIZE; - xbufp->buflen = PAGE_SIZE; - - /* Preallocate one XDR send buffer */ - page_snd = alloc_page(GFP_KERNEL); - if (page_snd == NULL) { - printk(KERN_ERR "Failed to create bc snd xbuf\n"); - goto out_free; - } - - xbufp = &req->rq_snd_buf; - xbufp->head[0].iov_base = page_address(page_snd); - xbufp->head[0].iov_len = 0; - xbufp->tail[0].iov_base = NULL; - xbufp->tail[0].iov_len = 0; - xbufp->page_len = 0; - xbufp->len = 0; - xbufp->buflen = PAGE_SIZE; } /* @@ -167,7 +181,10 @@ out_free: /* * Memory allocation failed, free the temporary list */ - list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) { + while (!list_empty(&tmp_list)) { + req = list_first_entry(&tmp_list, + struct rpc_rqst, + rq_bc_pa_list); list_del(&req->rq_bc_pa_list); xprt_free_allocation(req); } @@ -217,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) struct rpc_rqst *req = NULL; dprintk("RPC: allocate a backchannel request\n"); - if (list_empty(&xprt->bc_pa_list)) + if (atomic_read(&xprt->bc_free_slots) <= 0) goto not_found; - + if (list_empty(&xprt->bc_pa_list)) { + req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); + if (!req) + goto not_found; + /* Note: this 'free' request adds it to xprt->bc_pa_list */ + xprt_free_bc_request(req); + } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); req->rq_reply_bytes_recvd = 0; @@ -245,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req) req->rq_connect_cookie = xprt->connect_cookie - 1; smp_mb__before_atomic(); - WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); smp_mb__after_atomic(); - if (!xprt_need_to_requeue(xprt)) { + /* + * Return it to the list of preallocations so that it + * may be reused by a new callback request. + */ + spin_lock_bh(&xprt->bc_pa_lock); + if (xprt_need_to_requeue(xprt)) { + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); + xprt->bc_alloc_count++; + req = NULL; + } + spin_unlock_bh(&xprt->bc_pa_lock); + if (req != NULL) { /* * The last remaining session was destroyed while this * entry was in use. Free the entry and don't attempt @@ -260,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req) xprt_free_allocation(req); return; } - - /* - * Return it to the list of preallocations so that it - * may be reused by a new callback request. - */ - spin_lock_bh(&xprt->bc_pa_lock); - list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); - spin_unlock_bh(&xprt->bc_pa_lock); } /* @@ -311,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); + xprt->bc_alloc_count--; spin_unlock(&xprt->bc_pa_lock); req->rq_private_buf.len = copied; diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c deleted file mode 100644 index 15c7a8a1c24f..000000000000 --- a/net/sunrpc/bc_svc.c +++ /dev/null @@ -1,63 +0,0 @@ -/****************************************************************************** - -(c) 2007 Network Appliance, Inc. All Rights Reserved. -(c) 2009 NetApp. All Rights Reserved. - -NetApp provides this source code under the GPL v2 License. -The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ - -/* - * The NFSv4.1 callback service helper routines. - * They implement the transport level processing required to send the - * reply over an existing open connection previously established by the client. - */ - -#include <linux/module.h> - -#include <linux/sunrpc/xprt.h> -#include <linux/sunrpc/sched.h> -#include <linux/sunrpc/bc_xprt.h> - -#define RPCDBG_FACILITY RPCDBG_SVCDSP - -/* Empty callback ops */ -static const struct rpc_call_ops nfs41_callback_ops = { -}; - - -/* - * Send the callback reply - */ -int bc_send(struct rpc_rqst *req) -{ - struct rpc_task *task; - int ret; - - dprintk("RPC: bc_send req= %p\n", req); - task = rpc_run_bc_task(req, &nfs41_callback_ops); - if (IS_ERR(task)) - ret = PTR_ERR(task); - else { - WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); - ret = task->tk_status; - rpc_put_task(task); - } - dprintk("RPC: bc_send ret= %d\n", ret); - return ret; -} - diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e6ce1517367f..cbc6af923dd1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_SOFT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (sk_memalloc_socks()) { - struct rpc_xprt *xprt; - - rcu_read_lock(); - xprt = rcu_dereference(clnt->cl_xprt); - if (xprt->swapper) - task->tk_flags |= RPC_TASK_SWAPPER; - rcu_read_unlock(); - } + if (atomic_read(&clnt->cl_swapper)) + task->tk_flags |= RPC_TASK_SWAPPER; /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); @@ -1031,15 +1024,14 @@ EXPORT_SYMBOL_GPL(rpc_call_async); * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it * @req: RPC request - * @tk_ops: RPC call ops */ -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *tk_ops) +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) { struct rpc_task *task; struct xdr_buf *xbufp = &req->rq_snd_buf; struct rpc_task_setup task_setup_data = { - .callback_ops = tk_ops, + .callback_ops = &rpc_default_ops, + .flags = RPC_TASK_SOFTCONN, }; dprintk("RPC: rpc_run_bc_task req= %p\n", req); @@ -1614,6 +1606,7 @@ call_allocate(struct rpc_task *task) req->rq_callsize + req->rq_rcvsize); if (req->rq_buffer != NULL) return; + xprt_inject_disconnect(xprt); dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); @@ -1951,33 +1944,36 @@ call_bc_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - if (!xprt_prepare_transmit(task)) { - /* - * Could not reserve the transport. Try again after the - * transport is released. - */ - task->tk_status = 0; - task->tk_action = call_bc_transmit; - return; - } + if (!xprt_prepare_transmit(task)) + goto out_retry; - task->tk_action = rpc_exit_task; if (task->tk_status < 0) { printk(KERN_NOTICE "RPC: Could not send backchannel reply " "error: %d\n", task->tk_status); - return; + goto out_done; } + if (req->rq_connect_cookie != req->rq_xprt->connect_cookie) + req->rq_bytes_sent = 0; xprt_transmit(task); + + if (task->tk_status == -EAGAIN) + goto out_nospace; + xprt_end_transmit(task); dprint_status(task); switch (task->tk_status) { case 0: /* Success */ - break; case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + case -ECONNRESET: + case -ECONNREFUSED: + case -EADDRINUSE: + case -ENOTCONN: + case -EPIPE: + break; case -ETIMEDOUT: /* * Problem reaching the server. Disconnect and let the @@ -2002,6 +1998,13 @@ call_bc_transmit(struct rpc_task *task) break; } rpc_wake_up_queued_task(&req->rq_xprt->pending, task); +out_done: + task->tk_action = rpc_exit_task; + return; +out_nospace: + req->rq_connect_cookie = req->rq_xprt->connect_cookie; +out_retry: + task->tk_status = 0; } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ @@ -2476,3 +2479,59 @@ void rpc_show_tasks(struct net *net) spin_unlock(&sn->rpc_client_lock); } #endif + +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + int ret = 0; + struct rpc_xprt *xprt; + + if (atomic_inc_return(&clnt->cl_swapper) == 1) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + ret = xprt_enable_swap(xprt); + xprt_put(xprt); + } + return ret; +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate); + +void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + + if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + xprt_disable_swap(xprt); + xprt_put(xprt); + } +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate); +#endif /* CONFIG_SUNRPC_SWAP */ diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 82962f7e6e88..e7b4d93566df 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -10,9 +10,12 @@ #include "netns.h" static struct dentry *topdir; +static struct dentry *rpc_fault_dir; static struct dentry *rpc_clnt_dir; static struct dentry *rpc_xprt_dir; +unsigned int rpc_inject_disconnect; + struct rpc_clnt_iter { struct rpc_clnt *clnt; loff_t pos; @@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt) debugfs_remove_recursive(xprt->debugfs); xprt->debugfs = NULL; } + + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); } void @@ -266,11 +271,79 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) xprt->debugfs = NULL; } +static int +fault_open(struct inode *inode, struct file *filp) +{ + filp->private_data = kmalloc(128, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + return 0; +} + +static int +fault_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +static ssize_t +fault_disconnect_read(struct file *filp, char __user *user_buf, + size_t len, loff_t *offset) +{ + char *buffer = (char *)filp->private_data; + size_t size; + + size = sprintf(buffer, "%u\n", rpc_inject_disconnect); + return simple_read_from_buffer(user_buf, len, offset, buffer, size); +} + +static ssize_t +fault_disconnect_write(struct file *filp, const char __user *user_buf, + size_t len, loff_t *offset) +{ + char buffer[16]; + + if (len >= sizeof(buffer)) + len = sizeof(buffer) - 1; + if (copy_from_user(buffer, user_buf, len)) + return -EFAULT; + buffer[len] = '\0'; + if (kstrtouint(buffer, 10, &rpc_inject_disconnect)) + return -EINVAL; + return len; +} + +static const struct file_operations fault_disconnect_fops = { + .owner = THIS_MODULE, + .open = fault_open, + .read = fault_disconnect_read, + .write = fault_disconnect_write, + .release = fault_release, +}; + +static struct dentry * +inject_fault_dir(struct dentry *topdir) +{ + struct dentry *faultdir; + + faultdir = debugfs_create_dir("inject_fault", topdir); + if (!faultdir) + return NULL; + + if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir, + NULL, &fault_disconnect_fops)) + return NULL; + + return faultdir; +} + void __exit sunrpc_debugfs_exit(void) { debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; rpc_xprt_dir = NULL; } @@ -282,6 +355,10 @@ sunrpc_debugfs_init(void) if (!topdir) return; + rpc_fault_dir = inject_fault_dir(topdir); + if (!rpc_fault_dir) + goto out_remove; + rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); if (!rpc_clnt_dir) goto out_remove; @@ -294,5 +371,6 @@ sunrpc_debugfs_init(void) out_remove: debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 852ae606b02a..5a16d8d8c831 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1350,6 +1350,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; + struct rpc_task *task; + int proc_error; + int error; + + dprintk("svc: %s(%p)\n", __func__, req); /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xprt = serv->sv_bc_xprt; @@ -1372,21 +1377,36 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, /* * Skip the next two words because they've already been - * processed in the trasport + * processed in the transport */ svc_getu32(argv); /* XID */ svc_getnl(argv); /* CALLDIR */ - /* Returns 1 for send, 0 for drop */ - if (svc_process_common(rqstp, argv, resv)) { - memcpy(&req->rq_snd_buf, &rqstp->rq_res, - sizeof(req->rq_snd_buf)); - return bc_send(req); - } else { - /* drop request */ + /* Parse and execute the bc call */ + proc_error = svc_process_common(rqstp, argv, resv); + + atomic_inc(&req->rq_xprt->bc_free_slots); + if (!proc_error) { + /* Processing error: drop the request */ xprt_free_bc_request(req); return 0; } + + /* Finally, send the reply synchronously */ + memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); + task = rpc_run_bc_task(req); + if (IS_ERR(task)) { + error = PTR_ERR(task); + goto out; + } + + WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); + error = task->tk_status; + rpc_put_task(task); + +out: + dprintk("svc: %s(), error=%d\n", __func__, error); + return error; } EXPORT_SYMBOL_GPL(bc_svc_process); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1d4fe24af06a..ab5dd621ae0c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -68,6 +68,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *); static void xprt_destroy(struct rpc_xprt *xprt); static DEFINE_SPINLOCK(xprt_list_lock); @@ -250,6 +251,8 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) } xprt_clear_locked(xprt); out_sleep: + if (req) + __xprt_put_cong(xprt, req); dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; @@ -608,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work) struct rpc_xprt *xprt = container_of(work, struct rpc_xprt, task_cleanup); - xprt->ops->close(xprt); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + xprt->ops->close(xprt); xprt_release_write(xprt, NULL); } @@ -967,6 +970,7 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = status; return; } + xprt_inject_disconnect(xprt); dprintk("RPC: %5u xmit complete\n", task->tk_pid); task->tk_flags |= RPC_TASK_SENT; @@ -1285,6 +1289,7 @@ void xprt_release(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(req->rq_buffer); + xprt_inject_disconnect(xprt); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); task->tk_rqstp = NULL; diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 302d4ebf6fbf..f1e8dafbd507 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -11,6 +11,21 @@ * can take tens of usecs to complete. */ +/* Normal operation + * + * A Memory Region is prepared for RDMA READ or WRITE using the + * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is + * finished, the Memory Region is unmapped using the ib_unmap_fmr + * verb (fmr_op_unmap). + */ + +/* Transport recovery + * + * After a transport reconnect, fmr_op_map re-uses the MR already + * allocated for the RPC, but generates a fresh rkey then maps the + * MR again. This process is synchronous. + */ + #include "xprt_rdma.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -50,19 +65,28 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) struct rpcrdma_mw *r; int i, rc; + spin_lock_init(&buf->rb_mwlock); INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); - i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); + i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1); + i += 2; /* head + tail */ + i *= buf->rb_max_requests; /* one set for each RPC slot */ + dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); + rc = -ENOMEM; while (i--) { r = kzalloc(sizeof(*r), GFP_KERNEL); if (!r) - return -ENOMEM; + goto out; - r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); - if (IS_ERR(r->r.fmr)) + r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES * + sizeof(u64), GFP_KERNEL); + if (!r->r.fmr.physaddrs) + goto out_free; + + r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); + if (IS_ERR(r->r.fmr.fmr)) goto out_fmr_err; list_add(&r->mw_list, &buf->rb_mws); @@ -71,12 +95,24 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) return 0; out_fmr_err: - rc = PTR_ERR(r->r.fmr); + rc = PTR_ERR(r->r.fmr.fmr); dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); + kfree(r->r.fmr.physaddrs); +out_free: kfree(r); +out: return rc; } +static int +__fmr_unmap(struct rpcrdma_mw *r) +{ + LIST_HEAD(l); + + list_add(&r->r.fmr.fmr->list, &l); + return ib_unmap_fmr(&l); +} + /* Use the ib_map_phys_fmr() verb to register a memory region * for remote access via RDMA READ or RDMA WRITE. */ @@ -85,12 +121,24 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct ib_device *device = ia->ri_id->device; + struct ib_device *device = ia->ri_device; enum dma_data_direction direction = rpcrdma_data_dir(writing); struct rpcrdma_mr_seg *seg1 = seg; - struct rpcrdma_mw *mw = seg1->rl_mw; - u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; int len, pageoff, i, rc; + struct rpcrdma_mw *mw; + + mw = seg1->rl_mw; + seg1->rl_mw = NULL; + if (!mw) { + mw = rpcrdma_get_mw(r_xprt); + if (!mw) + return -ENOMEM; + } else { + /* this is a retransmit; generate a fresh rkey */ + rc = __fmr_unmap(mw); + if (rc) + return rc; + } pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ @@ -100,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, nsegs = RPCRDMA_MAX_FMR_SGES; for (i = 0; i < nsegs;) { rpcrdma_map_one(device, seg, direction); - physaddrs[i] = seg->mr_dma; + mw->r.fmr.physaddrs[i] = seg->mr_dma; len += seg->mr_len; ++seg; ++i; @@ -110,11 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, break; } - rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); + rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs, + i, seg1->mr_dma); if (rc) goto out_maperr; - seg1->mr_rkey = mw->r.fmr->rkey; + seg1->rl_mw = mw; + seg1->mr_rkey = mw->r.fmr.fmr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; seg1->mr_len = len; @@ -137,48 +187,28 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mr_seg *seg1 = seg; - struct ib_device *device; + struct rpcrdma_mw *mw = seg1->rl_mw; int rc, nsegs = seg->mr_nsegs; - LIST_HEAD(l); - list_add(&seg1->rl_mw->r.fmr->list, &l); - rc = ib_unmap_fmr(&l); - read_lock(&ia->ri_qplock); - device = ia->ri_id->device; + dprintk("RPC: %s: FMR %p\n", __func__, mw); + + seg1->rl_mw = NULL; while (seg1->mr_nsegs--) - rpcrdma_unmap_one(device, seg++); - read_unlock(&ia->ri_qplock); + rpcrdma_unmap_one(ia->ri_device, seg++); + rc = __fmr_unmap(mw); if (rc) goto out_err; + rpcrdma_put_mw(r_xprt, mw); return nsegs; out_err: + /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy + * will attempt to release it when the transport is destroyed. + */ dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); return nsegs; } -/* After a disconnect, unmap all FMRs. - * - * This is invoked only in the transport connect worker in order - * to serialize with rpcrdma_register_fmr_external(). - */ -static void -fmr_op_reset(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_mw *r; - LIST_HEAD(list); - int rc; - - list_for_each_entry(r, &buf->rb_all, mw_all) - list_add(&r->r.fmr->list, &list); - - rc = ib_unmap_fmr(&list); - if (rc) - dprintk("RPC: %s: ib_unmap_fmr failed %i\n", - __func__, rc); -} - static void fmr_op_destroy(struct rpcrdma_buffer *buf) { @@ -188,10 +218,13 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) while (!list_empty(&buf->rb_all)) { r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); list_del(&r->mw_all); - rc = ib_dealloc_fmr(r->r.fmr); + kfree(r->r.fmr.physaddrs); + + rc = ib_dealloc_fmr(r->r.fmr.fmr); if (rc) dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", __func__, rc); + kfree(r); } } @@ -202,7 +235,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { .ro_open = fmr_op_open, .ro_maxpages = fmr_op_maxpages, .ro_init = fmr_op_init, - .ro_reset = fmr_op_reset, .ro_destroy = fmr_op_destroy, .ro_displayname = "fmr", }; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index d234521320a4..04ea914201b2 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -11,12 +11,136 @@ * but most complex memory registration mode. */ +/* Normal operation + * + * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG + * Work Request (frmr_op_map). When the RDMA operation is finished, this + * Memory Region is invalidated using a LOCAL_INV Work Request + * (frmr_op_unmap). + * + * Typically these Work Requests are not signaled, and neither are RDMA + * SEND Work Requests (with the exception of signaling occasionally to + * prevent provider work queue overflows). This greatly reduces HCA + * interrupt workload. + * + * As an optimization, frwr_op_unmap marks MRs INVALID before the + * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on + * rb_mws immediately so that no work (like managing a linked list + * under a spinlock) is needed in the completion upcall. + * + * But this means that frwr_op_map() can occasionally encounter an MR + * that is INVALID but the LOCAL_INV WR has not completed. Work Queue + * ordering prevents a subsequent FAST_REG WR from executing against + * that MR while it is still being invalidated. + */ + +/* Transport recovery + * + * ->op_map and the transport connect worker cannot run at the same + * time, but ->op_unmap can fire while the transport connect worker + * is running. Thus MR recovery is handled in ->op_map, to guarantee + * that recovered MRs are owned by a sending RPC, and not one where + * ->op_unmap could fire at the same time transport reconnect is + * being done. + * + * When the underlying transport disconnects, MRs are left in one of + * three states: + * + * INVALID: The MR was not in use before the QP entered ERROR state. + * (Or, the LOCAL_INV WR has not completed or flushed yet). + * + * STALE: The MR was being registered or unregistered when the QP + * entered ERROR state, and the pending WR was flushed. + * + * VALID: The MR was registered before the QP entered ERROR state. + * + * When frwr_op_map encounters STALE and VALID MRs, they are recovered + * with ib_dereg_mr and then are re-initialized. Beause MR recovery + * allocates fresh resources, it is deferred to a workqueue, and the + * recovered MRs are placed back on the rb_mws list when recovery is + * complete. frwr_op_map allocates another MR for the current RPC while + * the broken MR is reset. + * + * To ensure that frwr_op_map doesn't encounter an MR that is marked + * INVALID but that is about to be flushed due to a previous transport + * disconnect, the transport connect worker attempts to drain all + * pending send queue WRs before the transport is reconnected. + */ + #include "xprt_rdma.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif +static struct workqueue_struct *frwr_recovery_wq; + +#define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM) + +int +frwr_alloc_recovery_wq(void) +{ + frwr_recovery_wq = alloc_workqueue("frwr_recovery", + FRWR_RECOVERY_WQ_FLAGS, 0); + return !frwr_recovery_wq ? -ENOMEM : 0; +} + +void +frwr_destroy_recovery_wq(void) +{ + struct workqueue_struct *wq; + + if (!frwr_recovery_wq) + return; + + wq = frwr_recovery_wq; + frwr_recovery_wq = NULL; + destroy_workqueue(wq); +} + +/* Deferred reset of a single FRMR. Generate a fresh rkey by + * replacing the MR. + * + * There's no recovery if this fails. The FRMR is abandoned, but + * remains in rb_all. It will be cleaned up when the transport is + * destroyed. + */ +static void +__frwr_recovery_worker(struct work_struct *work) +{ + struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw, + r.frmr.fr_work); + struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt; + unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; + struct ib_pd *pd = r_xprt->rx_ia.ri_pd; + + if (ib_dereg_mr(r->r.frmr.fr_mr)) + goto out_fail; + + r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth); + if (IS_ERR(r->r.frmr.fr_mr)) + goto out_fail; + + dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); + r->r.frmr.fr_state = FRMR_IS_INVALID; + rpcrdma_put_mw(r_xprt, r); + return; + +out_fail: + pr_warn("RPC: %s: FRMR %p unrecovered\n", + __func__, r); +} + +/* A broken MR was discovered in a context that can't sleep. + * Defer recovery to the recovery worker. + */ +static void +__frwr_queue_recovery(struct rpcrdma_mw *r) +{ + INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker); + queue_work(frwr_recovery_wq, &r->r.frmr.fr_work); +} + static int __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, unsigned int depth) @@ -128,7 +252,7 @@ frwr_sendcompletion(struct ib_wc *wc) /* WARNING: Only wr_id and status are reliable at this point */ r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; - dprintk("RPC: %s: frmr %p (stale), status %s (%d)\n", + pr_warn("RPC: %s: frmr %p flushed, status %s (%d)\n", __func__, r, ib_wc_status_msg(wc->status), wc->status); r->r.frmr.fr_state = FRMR_IS_STALE; } @@ -137,16 +261,19 @@ static int frwr_op_init(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct ib_device *device = r_xprt->rx_ia.ri_id->device; + struct ib_device *device = r_xprt->rx_ia.ri_device; unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; struct ib_pd *pd = r_xprt->rx_ia.ri_pd; int i; + spin_lock_init(&buf->rb_mwlock); INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); - i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); + i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1); + i += 2; /* head + tail */ + i *= buf->rb_max_requests; /* one set for each RPC slot */ + dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); while (i--) { struct rpcrdma_mw *r; @@ -165,6 +292,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt) list_add(&r->mw_list, &buf->rb_mws); list_add(&r->mw_all, &buf->rb_all); r->mw_sendcompletion = frwr_sendcompletion; + r->r.frmr.fr_xprt = r_xprt; } return 0; @@ -178,12 +306,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct ib_device *device = ia->ri_id->device; + struct ib_device *device = ia->ri_device; enum dma_data_direction direction = rpcrdma_data_dir(writing); struct rpcrdma_mr_seg *seg1 = seg; - struct rpcrdma_mw *mw = seg1->rl_mw; - struct rpcrdma_frmr *frmr = &mw->r.frmr; - struct ib_mr *mr = frmr->fr_mr; + struct rpcrdma_mw *mw; + struct rpcrdma_frmr *frmr; + struct ib_mr *mr; struct ib_send_wr fastreg_wr, *bad_wr; u8 key; int len, pageoff; @@ -192,12 +320,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, u64 pa; int page_no; + mw = seg1->rl_mw; + seg1->rl_mw = NULL; + do { + if (mw) + __frwr_queue_recovery(mw); + mw = rpcrdma_get_mw(r_xprt); + if (!mw) + return -ENOMEM; + } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); + frmr = &mw->r.frmr; + frmr->fr_state = FRMR_IS_VALID; + pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ seg1->mr_len += pageoff; len = -pageoff; if (nsegs > ia->ri_max_frmr_depth) nsegs = ia->ri_max_frmr_depth; + for (page_no = i = 0; i < nsegs;) { rpcrdma_map_one(device, seg, direction); pa = seg->mr_dma; @@ -216,8 +357,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", __func__, mw, i, len); - frmr->fr_state = FRMR_IS_VALID; - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); fastreg_wr.wr_id = (unsigned long)(void *)mw; fastreg_wr.opcode = IB_WR_FAST_REG_MR; @@ -229,6 +368,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, fastreg_wr.wr.fast_reg.access_flags = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; + mr = frmr->fr_mr; key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); fastreg_wr.wr.fast_reg.rkey = mr->rkey; @@ -238,6 +378,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, if (rc) goto out_senderr; + seg1->rl_mw = mw; seg1->mr_rkey = mr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; @@ -246,10 +387,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, out_senderr: dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); - ib_update_fast_reg_key(mr, --key); - frmr->fr_state = FRMR_IS_INVALID; while (i--) rpcrdma_unmap_one(device, --seg); + __frwr_queue_recovery(mw); return rc; } @@ -261,78 +401,46 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_mw *mw = seg1->rl_mw; struct ib_send_wr invalidate_wr, *bad_wr; int rc, nsegs = seg->mr_nsegs; - struct ib_device *device; - seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; + dprintk("RPC: %s: FRMR %p\n", __func__, mw); + + seg1->rl_mw = NULL; + mw->r.frmr.fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof(invalidate_wr)); - invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; + invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - read_lock(&ia->ri_qplock); - device = ia->ri_id->device; while (seg1->mr_nsegs--) - rpcrdma_unmap_one(device, seg++); + rpcrdma_unmap_one(ia->ri_device, seg++); + read_lock(&ia->ri_qplock); rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); read_unlock(&ia->ri_qplock); if (rc) goto out_err; + + rpcrdma_put_mw(r_xprt, mw); return nsegs; out_err: - /* Force rpcrdma_buffer_get() to retry */ - seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); + __frwr_queue_recovery(mw); return nsegs; } -/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in - * an unusable state. Find FRMRs in this state and dereg / reg - * each. FRMRs that are VALID and attached to an rpcrdma_req are - * also torn down. - * - * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. - * - * This is invoked only in the transport connect worker in order - * to serialize with rpcrdma_register_frmr_external(). - */ -static void -frwr_op_reset(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct ib_device *device = r_xprt->rx_ia.ri_id->device; - unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; - struct ib_pd *pd = r_xprt->rx_ia.ri_pd; - struct rpcrdma_mw *r; - int rc; - - list_for_each_entry(r, &buf->rb_all, mw_all) { - if (r->r.frmr.fr_state == FRMR_IS_INVALID) - continue; - - __frwr_release(r); - rc = __frwr_init(r, pd, device, depth); - if (rc) { - dprintk("RPC: %s: mw %p left %s\n", - __func__, r, - (r->r.frmr.fr_state == FRMR_IS_STALE ? - "stale" : "valid")); - continue; - } - - r->r.frmr.fr_state = FRMR_IS_INVALID; - } -} - static void frwr_op_destroy(struct rpcrdma_buffer *buf) { struct rpcrdma_mw *r; + /* Ensure stale MWs for "buf" are no longer in flight */ + flush_workqueue(frwr_recovery_wq); + while (!list_empty(&buf->rb_all)) { r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); list_del(&r->mw_all); @@ -347,7 +455,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { .ro_open = frwr_op_open, .ro_maxpages = frwr_op_maxpages, .ro_init = frwr_op_init, - .ro_reset = frwr_op_reset, .ro_destroy = frwr_op_destroy, .ro_displayname = "frwr", }; diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index ba518af16787..41985d07fdb7 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c @@ -50,8 +50,7 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - rpcrdma_map_one(ia->ri_id->device, seg, - rpcrdma_data_dir(writing)); + rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing)); seg->mr_rkey = ia->ri_bind_mem->rkey; seg->mr_base = seg->mr_dma; seg->mr_nsegs = 1; @@ -65,19 +64,11 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - read_lock(&ia->ri_qplock); - rpcrdma_unmap_one(ia->ri_id->device, seg); - read_unlock(&ia->ri_qplock); - + rpcrdma_unmap_one(ia->ri_device, seg); return 1; } static void -physical_op_reset(struct rpcrdma_xprt *r_xprt) -{ -} - -static void physical_op_destroy(struct rpcrdma_buffer *buf) { } @@ -88,7 +79,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { .ro_open = physical_op_open, .ro_maxpages = physical_op_maxpages, .ro_init = physical_op_init, - .ro_reset = physical_op_reset, .ro_destroy = physical_op_destroy, .ro_displayname = "physical", }; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2c53ea9e1b83..84ea37daef36 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -284,9 +284,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, return (unsigned char *)iptr - (unsigned char *)headerp; out: - if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) - return n; - for (pos = 0; nchunks--;) pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, &req->rl_segments[pos]); @@ -732,8 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) struct rpcrdma_msg *headerp; struct rpcrdma_req *req; struct rpc_rqst *rqst; - struct rpc_xprt *xprt = rep->rr_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; + struct rpc_xprt *xprt = &r_xprt->rx_xprt; __be32 *iptr; int rdmalen, status; unsigned long cwnd; @@ -770,7 +767,6 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) rep->rr_len); repost: r_xprt->rx_stats.bad_reply_count++; - rep->rr_func = rpcrdma_reply_handler; if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) rpcrdma_recv_buffer_put(rep); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 436da2caec95..680f888a9ddd 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -240,6 +240,16 @@ xprt_rdma_connect_worker(struct work_struct *work) xprt_clear_connecting(xprt); } +static void +xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, + rx_xprt); + + pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); + rdma_disconnect(r_xprt->rx_ia.ri_id); +} + /* * xprt_rdma_destroy * @@ -612,12 +622,6 @@ xprt_rdma_send_request(struct rpc_task *task) if (req->rl_reply == NULL) /* e.g. reconnection */ rpcrdma_recv_buffer_get(req); - if (req->rl_reply) { - req->rl_reply->rr_func = rpcrdma_reply_handler; - /* this need only be done once, but... */ - req->rl_reply->rr_xprt = xprt; - } - /* Must suppress retransmit to maintain credits */ if (req->rl_connect_cookie == xprt->connect_cookie) goto drop_connection; @@ -676,6 +680,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) r_xprt->rx_stats.bad_reply_count); } +static int +xprt_rdma_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xprt_rdma_disable_swap(struct rpc_xprt *xprt) +{ +} + /* * Plumbing for rpc transport switch and kernel module */ @@ -694,7 +709,10 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .send_request = xprt_rdma_send_request, .close = xprt_rdma_close, .destroy = xprt_rdma_destroy, - .print_stats = xprt_rdma_print_stats + .print_stats = xprt_rdma_print_stats, + .enable_swap = xprt_rdma_enable_swap, + .disable_swap = xprt_rdma_disable_swap, + .inject_disconnect = xprt_rdma_inject_disconnect }; static struct xprt_class xprt_rdma = { @@ -720,17 +738,24 @@ void xprt_rdma_cleanup(void) if (rc) dprintk("RPC: %s: xprt_unregister returned %i\n", __func__, rc); + + frwr_destroy_recovery_wq(); } int xprt_rdma_init(void) { int rc; - rc = xprt_register_transport(&xprt_rdma); - + rc = frwr_alloc_recovery_wq(); if (rc) return rc; + rc = xprt_register_transport(&xprt_rdma); + if (rc) { + frwr_destroy_recovery_wq(); + return rc; + } + dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); dprintk("Defaults:\n"); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 52df265b472a..891c4ede2c20 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -80,7 +80,6 @@ static void rpcrdma_run_tasklet(unsigned long data) { struct rpcrdma_rep *rep; - void (*func)(struct rpcrdma_rep *); unsigned long flags; data = data; @@ -89,14 +88,9 @@ rpcrdma_run_tasklet(unsigned long data) rep = list_entry(rpcrdma_tasklets_g.next, struct rpcrdma_rep, rr_list); list_del(&rep->rr_list); - func = rep->rr_func; - rep->rr_func = NULL; spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - if (func) - func(rep); - else - rpcrdma_recv_buffer_put(rep); + rpcrdma_reply_handler(rep); spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); } @@ -236,7 +230,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) __func__, rep, wc->byte_len); rep->rr_len = wc->byte_len; - ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, + ib_dma_sync_single_for_cpu(rep->rr_device, rdmab_addr(rep->rr_rdmabuf), rep->rr_len, DMA_FROM_DEVICE); prefetch(rdmab_to_msg(rep->rr_rdmabuf)); @@ -407,7 +401,7 @@ connected: pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", sap, rpc_get_port(sap), - ia->ri_id->device->name, + ia->ri_device->name, ia->ri_ops->ro_displayname, xprt->rx_buf.rb_max_requests, ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); @@ -508,8 +502,9 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) rc = PTR_ERR(ia->ri_id); goto out1; } + ia->ri_device = ia->ri_id->device; - ia->ri_pd = ib_alloc_pd(ia->ri_id->device); + ia->ri_pd = ib_alloc_pd(ia->ri_device); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); dprintk("RPC: %s: ib_alloc_pd() failed %i\n", @@ -517,7 +512,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) goto out2; } - rc = ib_query_device(ia->ri_id->device, devattr); + rc = ib_query_device(ia->ri_device, devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); @@ -526,7 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { ia->ri_have_dma_lkey = 1; - ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; + ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; } if (memreg == RPCRDMA_FRMR) { @@ -541,7 +536,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) } } if (memreg == RPCRDMA_MTHCAFMR) { - if (!ia->ri_id->device->alloc_fmr) { + if (!ia->ri_device->alloc_fmr) { dprintk("RPC: %s: MTHCAFMR registration " "not supported by HCA\n", __func__); memreg = RPCRDMA_ALLPHYSICAL; @@ -590,9 +585,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) dprintk("RPC: %s: memory registration strategy is '%s'\n", __func__, ia->ri_ops->ro_displayname); - /* Else will do memory reg/dereg for each chunk */ - ia->ri_memreg_strategy = memreg; - rwlock_init(&ia->ri_qplock); return 0; @@ -622,17 +614,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) dprintk("RPC: %s: ib_dereg_mr returned %i\n", __func__, rc); } + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { if (ia->ri_id->qp) rdma_destroy_qp(ia->ri_id); rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; } - if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { - rc = ib_dealloc_pd(ia->ri_pd); - dprintk("RPC: %s: ib_dealloc_pd returned %i\n", - __func__, rc); - } + + /* If the pd is still busy, xprtrdma missed freeing a resource */ + if (ia->ri_pd && !IS_ERR(ia->ri_pd)) + WARN_ON(ib_dealloc_pd(ia->ri_pd)); } /* @@ -693,8 +685,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; - sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, - rpcrdma_cq_async_error_upcall, ep, &cq_attr); + sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, + rpcrdma_cq_async_error_upcall, ep, &cq_attr); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", @@ -710,8 +702,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, } cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; - recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, - rpcrdma_cq_async_error_upcall, ep, &cq_attr); + recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall, + rpcrdma_cq_async_error_upcall, ep, &cq_attr); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", @@ -817,8 +809,6 @@ retry: rpcrdma_flush_cqs(ep); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - ia->ri_ops->ro_reset(xprt); - id = rpcrdma_create_id(xprt, ia, (struct sockaddr *)&xprt->rx_data.addr); if (IS_ERR(id)) { @@ -832,7 +822,7 @@ retry: * More stuff I haven't thought of! * Rrrgh! */ - if (ia->ri_id->device != id->device) { + if (ia->ri_device != id->device) { printk("RPC: %s: can't reconnect on " "different device!\n", __func__); rdma_destroy_id(id); @@ -974,7 +964,8 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) goto out_free; } - rep->rr_buffer = &r_xprt->rx_buf; + rep->rr_device = ia->ri_device; + rep->rr_rxprt = r_xprt; return rep; out_free: @@ -1098,31 +1089,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_pool); } -/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving - * some req segments uninitialized. - */ -static void -rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf) +struct rpcrdma_mw * +rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) { - if (*mw) { - list_add_tail(&(*mw)->mw_list, &buf->rb_mws); - *mw = NULL; + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_mw *mw = NULL; + + spin_lock(&buf->rb_mwlock); + if (!list_empty(&buf->rb_mws)) { + mw = list_first_entry(&buf->rb_mws, + struct rpcrdma_mw, mw_list); + list_del_init(&mw->mw_list); } + spin_unlock(&buf->rb_mwlock); + + if (!mw) + pr_err("RPC: %s: no MWs available\n", __func__); + return mw; } -/* Cycle mw's back in reverse order, and "spin" them. - * This delays and scrambles reuse as much as possible. - */ -static void -rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) +void +rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) { - struct rpcrdma_mr_seg *seg = req->rl_segments; - struct rpcrdma_mr_seg *seg1 = seg; - int i; + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) - rpcrdma_buffer_put_mr(&seg->rl_mw, buf); - rpcrdma_buffer_put_mr(&seg1->rl_mw, buf); + spin_lock(&buf->rb_mwlock); + list_add_tail(&mw->mw_list, &buf->rb_mws); + spin_unlock(&buf->rb_mwlock); } static void @@ -1132,115 +1125,10 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) req->rl_niovs = 0; if (req->rl_reply) { buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply; - req->rl_reply->rr_func = NULL; req->rl_reply = NULL; } } -/* rpcrdma_unmap_one() was already done during deregistration. - * Redo only the ib_post_send(). - */ -static void -rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia) -{ - struct rpcrdma_xprt *r_xprt = - container_of(ia, struct rpcrdma_xprt, rx_ia); - struct ib_send_wr invalidate_wr, *bad_wr; - int rc; - - dprintk("RPC: %s: FRMR %p is stale\n", __func__, r); - - /* When this FRMR is re-inserted into rb_mws, it is no longer stale */ - r->r.frmr.fr_state = FRMR_IS_INVALID; - - memset(&invalidate_wr, 0, sizeof(invalidate_wr)); - invalidate_wr.wr_id = (unsigned long)(void *)r; - invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey; - DECR_CQCOUNT(&r_xprt->rx_ep); - - dprintk("RPC: %s: frmr %p invalidating rkey %08x\n", - __func__, r, r->r.frmr.fr_mr->rkey); - - read_lock(&ia->ri_qplock); - rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); - read_unlock(&ia->ri_qplock); - if (rc) { - /* Force rpcrdma_buffer_get() to retry */ - r->r.frmr.fr_state = FRMR_IS_STALE; - dprintk("RPC: %s: ib_post_send failed, %i\n", - __func__, rc); - } -} - -static void -rpcrdma_retry_flushed_linv(struct list_head *stale, - struct rpcrdma_buffer *buf) -{ - struct rpcrdma_ia *ia = rdmab_to_ia(buf); - struct list_head *pos; - struct rpcrdma_mw *r; - unsigned long flags; - - list_for_each(pos, stale) { - r = list_entry(pos, struct rpcrdma_mw, mw_list); - rpcrdma_retry_local_inv(r, ia); - } - - spin_lock_irqsave(&buf->rb_lock, flags); - list_splice_tail(stale, &buf->rb_mws); - spin_unlock_irqrestore(&buf->rb_lock, flags); -} - -static struct rpcrdma_req * -rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, - struct list_head *stale) -{ - struct rpcrdma_mw *r; - int i; - - i = RPCRDMA_MAX_SEGS - 1; - while (!list_empty(&buf->rb_mws)) { - r = list_entry(buf->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - if (r->r.frmr.fr_state == FRMR_IS_STALE) { - list_add(&r->mw_list, stale); - continue; - } - req->rl_segments[i].rl_mw = r; - if (unlikely(i-- == 0)) - return req; /* Success */ - } - - /* Not enough entries on rb_mws for this req */ - rpcrdma_buffer_put_sendbuf(req, buf); - rpcrdma_buffer_put_mrs(req, buf); - return NULL; -} - -static struct rpcrdma_req * -rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) -{ - struct rpcrdma_mw *r; - int i; - - i = RPCRDMA_MAX_SEGS - 1; - while (!list_empty(&buf->rb_mws)) { - r = list_entry(buf->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - req->rl_segments[i].rl_mw = r; - if (unlikely(i-- == 0)) - return req; /* Success */ - } - - /* Not enough entries on rb_mws for this req */ - rpcrdma_buffer_put_sendbuf(req, buf); - rpcrdma_buffer_put_mrs(req, buf); - return NULL; -} - /* * Get a set of request/reply buffers. * @@ -1253,12 +1141,11 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) { - struct rpcrdma_ia *ia = rdmab_to_ia(buffers); - struct list_head stale; struct rpcrdma_req *req; unsigned long flags; spin_lock_irqsave(&buffers->rb_lock, flags); + if (buffers->rb_send_index == buffers->rb_max_requests) { spin_unlock_irqrestore(&buffers->rb_lock, flags); dprintk("RPC: %s: out of request buffers\n", __func__); @@ -1277,20 +1164,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) } buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; - INIT_LIST_HEAD(&stale); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: - req = rpcrdma_buffer_get_frmrs(req, buffers, &stale); - break; - case RPCRDMA_MTHCAFMR: - req = rpcrdma_buffer_get_fmrs(req, buffers); - break; - default: - break; - } spin_unlock_irqrestore(&buffers->rb_lock, flags); - if (!list_empty(&stale)) - rpcrdma_retry_flushed_linv(&stale, buffers); return req; } @@ -1302,19 +1176,10 @@ void rpcrdma_buffer_put(struct rpcrdma_req *req) { struct rpcrdma_buffer *buffers = req->rl_buffer; - struct rpcrdma_ia *ia = rdmab_to_ia(buffers); unsigned long flags; spin_lock_irqsave(&buffers->rb_lock, flags); rpcrdma_buffer_put_sendbuf(req, buffers); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: - case RPCRDMA_MTHCAFMR: - rpcrdma_buffer_put_mrs(req, buffers); - break; - default: - break; - } spin_unlock_irqrestore(&buffers->rb_lock, flags); } @@ -1344,10 +1209,9 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) { - struct rpcrdma_buffer *buffers = rep->rr_buffer; + struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; unsigned long flags; - rep->rr_func = NULL; spin_lock_irqsave(&buffers->rb_lock, flags); buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; spin_unlock_irqrestore(&buffers->rb_lock, flags); @@ -1376,9 +1240,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, /* * All memory passed here was kmalloc'ed, therefore phys-contiguous. */ - iov->addr = ib_dma_map_single(ia->ri_id->device, + iov->addr = ib_dma_map_single(ia->ri_device, va, len, DMA_BIDIRECTIONAL); - if (ib_dma_mapping_error(ia->ri_id->device, iov->addr)) + if (ib_dma_mapping_error(ia->ri_device, iov->addr)) return -ENOMEM; iov->length = len; @@ -1422,8 +1286,8 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia, { int rc; - ib_dma_unmap_single(ia->ri_id->device, - iov->addr, iov->length, DMA_BIDIRECTIONAL); + ib_dma_unmap_single(ia->ri_device, + iov->addr, iov->length, DMA_BIDIRECTIONAL); if (NULL == mr) return 0; @@ -1516,15 +1380,18 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, send_wr.num_sge = req->rl_niovs; send_wr.opcode = IB_WR_SEND; if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[3].addr, req->rl_send_iov[3].length, - DMA_TO_DEVICE); - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[1].addr, req->rl_send_iov[1].length, - DMA_TO_DEVICE); - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[0].addr, req->rl_send_iov[0].length, - DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_device, + req->rl_send_iov[3].addr, + req->rl_send_iov[3].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_device, + req->rl_send_iov[1].addr, + req->rl_send_iov[1].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_device, + req->rl_send_iov[0].addr, + req->rl_send_iov[0].length, + DMA_TO_DEVICE); if (DECR_CQCOUNT(ep) > 0) send_wr.send_flags = 0; @@ -1557,7 +1424,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; recv_wr.num_sge = 1; - ib_dma_sync_single_for_cpu(ia->ri_id->device, + ib_dma_sync_single_for_cpu(ia->ri_device, rdmab_addr(rep->rr_rdmabuf), rdmab_length(rep->rr_rdmabuf), DMA_BIDIRECTIONAL); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 58163b88738c..f49dd8b38122 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -62,6 +62,7 @@ struct rpcrdma_ia { const struct rpcrdma_memreg_ops *ri_ops; rwlock_t ri_qplock; + struct ib_device *ri_device; struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct ib_mr *ri_bind_mem; @@ -69,7 +70,6 @@ struct rpcrdma_ia { int ri_have_dma_lkey; struct completion ri_done; int ri_async_rc; - enum rpcrdma_memreg ri_memreg_strategy; unsigned int ri_max_frmr_depth; struct ib_device_attr ri_devattr; struct ib_qp_attr ri_qp_attr; @@ -173,9 +173,8 @@ struct rpcrdma_buffer; struct rpcrdma_rep { unsigned int rr_len; - struct rpcrdma_buffer *rr_buffer; - struct rpc_xprt *rr_xprt; - void (*rr_func)(struct rpcrdma_rep *); + struct ib_device *rr_device; + struct rpcrdma_xprt *rr_rxprt; struct list_head rr_list; struct rpcrdma_regbuf *rr_rdmabuf; }; @@ -203,11 +202,18 @@ struct rpcrdma_frmr { struct ib_fast_reg_page_list *fr_pgl; struct ib_mr *fr_mr; enum rpcrdma_frmr_state fr_state; + struct work_struct fr_work; + struct rpcrdma_xprt *fr_xprt; +}; + +struct rpcrdma_fmr { + struct ib_fmr *fmr; + u64 *physaddrs; }; struct rpcrdma_mw { union { - struct ib_fmr *fmr; + struct rpcrdma_fmr fmr; struct rpcrdma_frmr frmr; } r; void (*mw_sendcompletion)(struct ib_wc *); @@ -281,15 +287,17 @@ rpcr_to_rdmar(struct rpc_rqst *rqst) * One of these is associated with a transport instance */ struct rpcrdma_buffer { - spinlock_t rb_lock; /* protects indexes */ - u32 rb_max_requests;/* client max requests */ - struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ - struct list_head rb_all; - int rb_send_index; + spinlock_t rb_mwlock; /* protect rb_mws list */ + struct list_head rb_mws; + struct list_head rb_all; + char *rb_pool; + + spinlock_t rb_lock; /* protect buf arrays */ + u32 rb_max_requests; + int rb_send_index; + int rb_recv_index; struct rpcrdma_req **rb_send_bufs; - int rb_recv_index; struct rpcrdma_rep **rb_recv_bufs; - char *rb_pool; }; #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) @@ -350,7 +358,6 @@ struct rpcrdma_memreg_ops { struct rpcrdma_create_data_internal *); size_t (*ro_maxpages)(struct rpcrdma_xprt *); int (*ro_init)(struct rpcrdma_xprt *); - void (*ro_reset)(struct rpcrdma_xprt *); void (*ro_destroy)(struct rpcrdma_buffer *); const char *ro_displayname; }; @@ -413,6 +420,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); +struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); +void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_recv_buffer_get(struct rpcrdma_req *); @@ -425,6 +434,9 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *, unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); +int frwr_alloc_recovery_wq(void); +void frwr_destroy_recovery_wq(void); + /* * Wrappers for chunk registration, shared by read/write chunk code. */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b0517287075b..e193c2b5476b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -623,24 +623,6 @@ process_status: } /** - * xs_tcp_shutdown - gracefully shut down a TCP socket - * @xprt: transport - * - * Initiates a graceful shutdown of the TCP socket by calling the - * equivalent of shutdown(SHUT_RDWR); - */ -static void xs_tcp_shutdown(struct rpc_xprt *xprt) -{ - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct socket *sock = transport->sock; - - if (sock != NULL) { - kernel_sock_shutdown(sock, SHUT_RDWR); - trace_rpc_socket_shutdown(xprt, sock); - } -} - -/** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request * @@ -786,6 +768,7 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt) xs_sock_reset_connection_flags(xprt); /* Mark transport as closed and wake up all pending tasks */ xprt_disconnect_done(xprt); + xprt_force_disconnect(xprt); } /** @@ -827,6 +810,9 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + if (atomic_read(&transport->xprt.swapper)) + sk_clear_memalloc(sk); + write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; @@ -863,6 +849,13 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_inject_disconnect(struct rpc_xprt *xprt) +{ + dprintk("RPC: injecting transport disconnect on xprt=%p\n", + xprt); + xprt_disconnect_done(xprt); +} + static void xs_xprt_free(struct rpc_xprt *xprt) { xs_free_peer_addresses(xprt); @@ -901,7 +894,6 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) /** * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets * @sk: socket with data to read - * @len: how much data to read * * Currently this assumes we can read the whole reply in a single gulp. */ @@ -965,7 +957,6 @@ static void xs_local_data_ready(struct sock *sk) /** * xs_udp_data_ready - "data ready" callback for UDP sockets * @sk: socket with data to read - * @len: how much data to read * */ static void xs_udp_data_ready(struct sock *sk) @@ -1389,7 +1380,6 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns /** * xs_tcp_data_ready - "data ready" callback for TCP sockets * @sk: socket with data to read - * @bytes: how much data to read * */ static void xs_tcp_data_ready(struct sock *sk) @@ -1886,9 +1876,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, /** * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint - * @xprt: RPC transport to connect * @transport: socket transport to connect - * @create_sock: function to create a socket of the correct type */ static int xs_local_setup_socket(struct sock_xprt *transport) { @@ -1960,43 +1948,84 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) msleep_interruptible(15000); } -#ifdef CONFIG_SUNRPC_SWAP +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +/* + * Note that this should be called with XPRT_LOCKED held (or when we otherwise + * know that we have exclusive access to the socket), to guard against + * races with xs_reset_transport. + */ static void xs_set_memalloc(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - if (xprt->swapper) + /* + * If there's no sock, then we have nothing to set. The + * reconnecting process will get it for us. + */ + if (!transport->inet) + return; + if (atomic_read(&xprt->swapper)) sk_set_memalloc(transport->inet); } /** - * xs_swapper - Tag this transport as being used for swap. + * xs_enable_swap - Tag this transport as being used for swap. * @xprt: transport to tag - * @enable: enable/disable * + * Take a reference to this transport on behalf of the rpc_clnt, and + * optionally mark it for swapping if it wasn't already. */ -int xs_swapper(struct rpc_xprt *xprt, int enable) +static int +xs_enable_swap(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, - xprt); - int err = 0; + struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (enable) { - xprt->swapper++; - xs_set_memalloc(xprt); - } else if (xprt->swapper) { - xprt->swapper--; - sk_clear_memalloc(transport->inet); - } + if (atomic_inc_return(&xprt->swapper) != 1) + return 0; + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) + return -ERESTARTSYS; + if (xs->inet) + sk_set_memalloc(xs->inet); + xprt_release_xprt(xprt, NULL); + return 0; +} - return err; +/** + * xs_disable_swap - Untag this transport as being used for swap. + * @xprt: transport to tag + * + * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the + * swapper refcount goes to 0, untag the socket as a memalloc socket. + */ +static void +xs_disable_swap(struct rpc_xprt *xprt) +{ + struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); + + if (!atomic_dec_and_test(&xprt->swapper)) + return; + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) + return; + if (xs->inet) + sk_clear_memalloc(xs->inet); + xprt_release_xprt(xprt, NULL); } -EXPORT_SYMBOL_GPL(xs_swapper); #else static void xs_set_memalloc(struct rpc_xprt *xprt) { } + +static int +xs_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xs_disable_swap(struct rpc_xprt *xprt) +{ +} #endif static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -2057,6 +2086,27 @@ out: xprt_wake_pending_tasks(xprt, status); } +/** + * xs_tcp_shutdown - gracefully shut down a TCP socket + * @xprt: transport + * + * Initiates a graceful shutdown of the TCP socket by calling the + * equivalent of shutdown(SHUT_RDWR); + */ +static void xs_tcp_shutdown(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = transport->sock; + + if (sock == NULL) + return; + if (xprt_connected(xprt)) { + kernel_sock_shutdown(sock, SHUT_RDWR); + trace_rpc_socket_shutdown(xprt, sock); + } else + xs_reset_transport(transport); +} + static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -2067,6 +2117,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) unsigned int keepidle = xprt->timeout->to_initval / HZ; unsigned int keepcnt = xprt->timeout->to_retries + 1; unsigned int opt_on = 1; + unsigned int timeo; /* TCP Keepalive options */ kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, @@ -2078,6 +2129,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keepcnt, sizeof(keepcnt)); + /* TCP user timeout (see RFC5482) */ + timeo = jiffies_to_msecs(xprt->timeout->to_initval) * + (xprt->timeout->to_retries + 1); + kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, + (char *)&timeo, sizeof(timeo)); + write_lock_bh(&sk->sk_callback_lock); xs_save_old_callbacks(transport, sk); @@ -2125,9 +2182,6 @@ out: /** * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint - * @xprt: RPC transport to connect - * @transport: socket transport to connect - * @create_sock: function to create a socket of the correct type * * Invoked by a work queue tasklet. */ @@ -2463,6 +2517,8 @@ static struct rpc_xprt_ops xs_local_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_local_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static struct rpc_xprt_ops xs_udp_ops = { @@ -2482,6 +2538,9 @@ static struct rpc_xprt_ops xs_udp_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_udp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static struct rpc_xprt_ops xs_tcp_ops = { @@ -2498,6 +2557,9 @@ static struct rpc_xprt_ops xs_tcp_ops = { .close = xs_tcp_shutdown, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; /* @@ -2515,6 +2577,9 @@ static struct rpc_xprt_ops bc_tcp_ops = { .close = bc_close, .destroy = bc_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static int xs_init_anyaddr(const int family, struct sockaddr *sap) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index f52abae0ec5f..aceaaed09811 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -86,7 +86,7 @@ $(simple-targets): $(obj)/conf PHONY += oldnoconfig savedefconfig defconfig # oldnoconfig is an alias of olddefconfig, because people already are dependent -# on its behavior(sets new symbols to their default value but not 'n') with the +# on its behavior (sets new symbols to their default value but not 'n') with the # counter-intuitive name. oldnoconfig: olddefconfig @@ -126,10 +126,11 @@ tinyconfig: # Help text used by make help help: @echo ' config - Update current config utilising a line-oriented program' - @echo ' nconfig - Update current config utilising a ncurses menu based program' + @echo ' nconfig - Update current config utilising a ncurses menu based' + @echo ' program' @echo ' menuconfig - Update current config utilising a menu based program' - @echo ' xconfig - Update current config utilising a QT based front-end' - @echo ' gconfig - Update current config utilising a GTK based front-end' + @echo ' xconfig - Update current config utilising a Qt based front-end' + @echo ' gconfig - Update current config utilising a GTK+ based front-end' @echo ' oldconfig - Update current config utilising a provided .config as base' @echo ' localmodconfig - Update current config disabling modules not loaded' @echo ' localyesconfig - Update current config converting local mods to core' @@ -142,7 +143,8 @@ help: @echo ' alldefconfig - New config with all symbols set to default' @echo ' randconfig - New config with random answer to all options' @echo ' listnewconfig - List new options' - @echo ' olddefconfig - Same as silentoldconfig but sets new symbols to their default value' + @echo ' olddefconfig - Same as silentoldconfig but sets new symbols to their' + @echo ' default value' @echo ' kvmconfig - Enable additional options for kvm guest kernel support' @echo ' xenconfig - Enable additional options for xen dom0 and guest kernel support' @echo ' tinyconfig - Configure the tiniest possible kernel' @@ -163,9 +165,9 @@ HOST_EXTRACFLAGS += $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags) \ # mconf: Used for the menuconfig target # Utilizes the lxdialog package # qconf: Used for the xconfig target -# Based on QT which needs to be installed to compile it +# Based on Qt which needs to be installed to compile it # gconf: Used for the gconfig target -# Based on GTK which needs to be installed to compile it +# Based on GTK+ which needs to be installed to compile it # object files used by all kconfig flavours lxdialog := lxdialog/checklist.o lxdialog/util.o lxdialog/inputbox.o @@ -222,11 +224,11 @@ ifeq ($(MAKECMDGOALS),xconfig) $(obj)/.tmp_qtcheck: $(src)/Makefile -include $(obj)/.tmp_qtcheck -# QT needs some extra effort... +# Qt needs some extra effort... $(obj)/.tmp_qtcheck: @set -e; $(kecho) " CHECK qt"; dir=""; pkg=""; \ if ! pkg-config --exists QtCore 2> /dev/null; then \ - echo "* Unable to find the QT4 tool qmake. Trying to use QT3"; \ + echo "* Unable to find the Qt4 tool qmake. Trying to use Qt3"; \ pkg-config --exists qt 2> /dev/null && pkg=qt; \ pkg-config --exists qt-mt 2> /dev/null && pkg=qt-mt; \ if [ -n "$$pkg" ]; then \ @@ -240,8 +242,8 @@ $(obj)/.tmp_qtcheck: done; \ if [ -z "$$dir" ]; then \ echo >&2 "*"; \ - echo >&2 "* Unable to find any QT installation. Please make sure that"; \ - echo >&2 "* the QT4 or QT3 development package is correctly installed and"; \ + echo >&2 "* Unable to find any Qt installation. Please make sure that"; \ + echo >&2 "* the Qt4 or Qt3 development package is correctly installed and"; \ echo >&2 "* either qmake can be found or install pkg-config or set"; \ echo >&2 "* the QTDIR environment variable to the correct location."; \ echo >&2 "*"; \ @@ -278,7 +280,7 @@ $(obj)/gconf.o: $(obj)/.tmp_gtkcheck ifeq ($(MAKECMDGOALS),gconfig) -include $(obj)/.tmp_gtkcheck -# GTK needs some extra effort, too... +# GTK+ needs some extra effort, too... $(obj)/.tmp_gtkcheck: @if `pkg-config --exists gtk+-2.0 gmodule-2.0 libglade-2.0`; then \ if `pkg-config --atleast-version=2.0.0 gtk+-2.0`; then \ @@ -309,7 +311,7 @@ quiet_cmd_moc = MOC $@ $(obj)/%.moc: $(src)/%.h $(obj)/.tmp_qtcheck $(call cmd,moc) -# Extract gconf menu items for I18N support +# Extract gconf menu items for i18n support $(obj)/gconf.glade.h: $(obj)/gconf.glade $(Q)intltool-extract --type=gettext/glade --srcdir=$(srctree) \ $(obj)/gconf.glade diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index fb0a2a286dca..667d1aa23711 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -13,9 +13,6 @@ static int expr_eq(struct expr *e1, struct expr *e2); static struct expr *expr_eliminate_yn(struct expr *e); -static struct expr *expr_extract_eq_and(struct expr **ep1, struct expr **ep2); -static struct expr *expr_extract_eq_or(struct expr **ep1, struct expr **ep2); -static void expr_extract_eq(enum expr_type type, struct expr **ep, struct expr **ep1, struct expr **ep2); struct expr *expr_alloc_symbol(struct symbol *sym) { @@ -82,6 +79,10 @@ struct expr *expr_copy(const struct expr *org) e->left.expr = expr_copy(org->left.expr); break; case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: e->left.sym = org->left.sym; e->right.sym = org->right.sym; @@ -114,6 +115,10 @@ void expr_free(struct expr *e) expr_free(e->left.expr); return; case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: break; case E_OR: @@ -200,6 +205,10 @@ static int expr_eq(struct expr *e1, struct expr *e2) return 0; switch (e1->type) { case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: return e1->left.sym == e2->left.sym && e1->right.sym == e2->right.sym; case E_SYMBOL: @@ -559,62 +568,6 @@ static void expr_eliminate_dups1(enum expr_type type, struct expr **ep1, struct #undef e2 } -static void expr_eliminate_dups2(enum expr_type type, struct expr **ep1, struct expr **ep2) -{ -#define e1 (*ep1) -#define e2 (*ep2) - struct expr *tmp, *tmp1, *tmp2; - - if (e1->type == type) { - expr_eliminate_dups2(type, &e1->left.expr, &e2); - expr_eliminate_dups2(type, &e1->right.expr, &e2); - return; - } - if (e2->type == type) { - expr_eliminate_dups2(type, &e1, &e2->left.expr); - expr_eliminate_dups2(type, &e1, &e2->right.expr); - } - if (e1 == e2) - return; - - switch (e1->type) { - case E_OR: - expr_eliminate_dups2(e1->type, &e1, &e1); - // (FOO || BAR) && (!FOO && !BAR) -> n - tmp1 = expr_transform(expr_alloc_one(E_NOT, expr_copy(e1))); - tmp2 = expr_copy(e2); - tmp = expr_extract_eq_and(&tmp1, &tmp2); - if (expr_is_yes(tmp1)) { - expr_free(e1); - e1 = expr_alloc_symbol(&symbol_no); - trans_count++; - } - expr_free(tmp2); - expr_free(tmp1); - expr_free(tmp); - break; - case E_AND: - expr_eliminate_dups2(e1->type, &e1, &e1); - // (FOO && BAR) || (!FOO || !BAR) -> y - tmp1 = expr_transform(expr_alloc_one(E_NOT, expr_copy(e1))); - tmp2 = expr_copy(e2); - tmp = expr_extract_eq_or(&tmp1, &tmp2); - if (expr_is_no(tmp1)) { - expr_free(e1); - e1 = expr_alloc_symbol(&symbol_yes); - trans_count++; - } - expr_free(tmp2); - expr_free(tmp1); - expr_free(tmp); - break; - default: - ; - } -#undef e1 -#undef e2 -} - struct expr *expr_eliminate_dups(struct expr *e) { int oldcount; @@ -627,7 +580,6 @@ struct expr *expr_eliminate_dups(struct expr *e) switch (e->type) { case E_OR: case E_AND: expr_eliminate_dups1(e->type, &e, &e); - expr_eliminate_dups2(e->type, &e, &e); default: ; } @@ -647,6 +599,10 @@ struct expr *expr_transform(struct expr *e) return NULL; switch (e->type) { case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: case E_SYMBOL: case E_LIST: @@ -719,6 +675,22 @@ struct expr *expr_transform(struct expr *e) e = tmp; e->type = e->type == E_EQUAL ? E_UNEQUAL : E_EQUAL; break; + case E_LEQ: + case E_GEQ: + // !a<='x' -> a>'x' + tmp = e->left.expr; + free(e); + e = tmp; + e->type = e->type == E_LEQ ? E_GTH : E_LTH; + break; + case E_LTH: + case E_GTH: + // !a<'x' -> a>='x' + tmp = e->left.expr; + free(e); + e = tmp; + e->type = e->type == E_LTH ? E_GEQ : E_LEQ; + break; case E_OR: // !(a || b) -> !a && !b tmp = e->left.expr; @@ -789,6 +761,10 @@ int expr_contains_symbol(struct expr *dep, struct symbol *sym) case E_SYMBOL: return dep->left.sym == sym; case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: return dep->left.sym == sym || dep->right.sym == sym; @@ -829,57 +805,6 @@ bool expr_depends_symbol(struct expr *dep, struct symbol *sym) return false; } -static struct expr *expr_extract_eq_and(struct expr **ep1, struct expr **ep2) -{ - struct expr *tmp = NULL; - expr_extract_eq(E_AND, &tmp, ep1, ep2); - if (tmp) { - *ep1 = expr_eliminate_yn(*ep1); - *ep2 = expr_eliminate_yn(*ep2); - } - return tmp; -} - -static struct expr *expr_extract_eq_or(struct expr **ep1, struct expr **ep2) -{ - struct expr *tmp = NULL; - expr_extract_eq(E_OR, &tmp, ep1, ep2); - if (tmp) { - *ep1 = expr_eliminate_yn(*ep1); - *ep2 = expr_eliminate_yn(*ep2); - } - return tmp; -} - -static void expr_extract_eq(enum expr_type type, struct expr **ep, struct expr **ep1, struct expr **ep2) -{ -#define e1 (*ep1) -#define e2 (*ep2) - if (e1->type == type) { - expr_extract_eq(type, ep, &e1->left.expr, &e2); - expr_extract_eq(type, ep, &e1->right.expr, &e2); - return; - } - if (e2->type == type) { - expr_extract_eq(type, ep, ep1, &e2->left.expr); - expr_extract_eq(type, ep, ep1, &e2->right.expr); - return; - } - if (expr_eq(e1, e2)) { - *ep = *ep ? expr_alloc_two(type, *ep, e1) : e1; - expr_free(e2); - if (type == E_AND) { - e1 = expr_alloc_symbol(&symbol_yes); - e2 = expr_alloc_symbol(&symbol_yes); - } else if (type == E_OR) { - e1 = expr_alloc_symbol(&symbol_no); - e2 = expr_alloc_symbol(&symbol_no); - } - } -#undef e1 -#undef e2 -} - struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symbol *sym) { struct expr *e1, *e2; @@ -914,6 +839,10 @@ struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symb case E_NOT: return expr_trans_compare(e->left.expr, type == E_EQUAL ? E_UNEQUAL : E_EQUAL, sym); case E_UNEQUAL: + case E_LTH: + case E_LEQ: + case E_GTH: + case E_GEQ: case E_EQUAL: if (type == E_EQUAL) { if (sym == &symbol_yes) @@ -941,10 +870,57 @@ struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symb return NULL; } +enum string_value_kind { + k_string, + k_signed, + k_unsigned, + k_invalid +}; + +union string_value { + unsigned long long u; + signed long long s; +}; + +static enum string_value_kind expr_parse_string(const char *str, + enum symbol_type type, + union string_value *val) +{ + char *tail; + enum string_value_kind kind; + + errno = 0; + switch (type) { + case S_BOOLEAN: + case S_TRISTATE: + return k_string; + case S_INT: + val->s = strtoll(str, &tail, 10); + kind = k_signed; + break; + case S_HEX: + val->u = strtoull(str, &tail, 16); + kind = k_unsigned; + break; + case S_STRING: + case S_UNKNOWN: + val->s = strtoll(str, &tail, 0); + kind = k_signed; + break; + default: + return k_invalid; + } + return !errno && !*tail && tail > str && isxdigit(tail[-1]) + ? kind : k_string; +} + tristate expr_calc_value(struct expr *e) { tristate val1, val2; const char *str1, *str2; + enum string_value_kind k1 = k_string, k2 = k_string; + union string_value lval = {}, rval = {}; + int res; if (!e) return yes; @@ -965,21 +941,57 @@ tristate expr_calc_value(struct expr *e) val1 = expr_calc_value(e->left.expr); return EXPR_NOT(val1); case E_EQUAL: - sym_calc_value(e->left.sym); - sym_calc_value(e->right.sym); - str1 = sym_get_string_value(e->left.sym); - str2 = sym_get_string_value(e->right.sym); - return !strcmp(str1, str2) ? yes : no; + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: - sym_calc_value(e->left.sym); - sym_calc_value(e->right.sym); - str1 = sym_get_string_value(e->left.sym); - str2 = sym_get_string_value(e->right.sym); - return !strcmp(str1, str2) ? no : yes; + break; default: printf("expr_calc_value: %d?\n", e->type); return no; } + + sym_calc_value(e->left.sym); + sym_calc_value(e->right.sym); + str1 = sym_get_string_value(e->left.sym); + str2 = sym_get_string_value(e->right.sym); + + if (e->left.sym->type != S_STRING || e->right.sym->type != S_STRING) { + k1 = expr_parse_string(str1, e->left.sym->type, &lval); + k2 = expr_parse_string(str2, e->right.sym->type, &rval); + } + + if (k1 == k_string || k2 == k_string) + res = strcmp(str1, str2); + else if (k1 == k_invalid || k2 == k_invalid) { + if (e->type != E_EQUAL && e->type != E_UNEQUAL) { + printf("Cannot compare \"%s\" and \"%s\"\n", str1, str2); + return no; + } + res = strcmp(str1, str2); + } else if (k1 == k_unsigned || k2 == k_unsigned) + res = (lval.u > rval.u) - (lval.u < rval.u); + else /* if (k1 == k_signed && k2 == k_signed) */ + res = (lval.s > rval.s) - (lval.s < rval.s); + + switch(e->type) { + case E_EQUAL: + return res ? no : yes; + case E_GEQ: + return res >= 0 ? yes : no; + case E_GTH: + return res > 0 ? yes : no; + case E_LEQ: + return res <= 0 ? yes : no; + case E_LTH: + return res < 0 ? yes : no; + case E_UNEQUAL: + return res ? yes : no; + default: + printf("expr_calc_value: relation %d?\n", e->type); + return no; + } } static int expr_compare_type(enum expr_type t1, enum expr_type t2) @@ -987,6 +999,12 @@ static int expr_compare_type(enum expr_type t1, enum expr_type t2) if (t1 == t2) return 0; switch (t1) { + case E_LEQ: + case E_LTH: + case E_GEQ: + case E_GTH: + if (t2 == E_EQUAL || t2 == E_UNEQUAL) + return 1; case E_EQUAL: case E_UNEQUAL: if (t2 == E_NOT) @@ -1080,6 +1098,24 @@ void expr_print(struct expr *e, void (*fn)(void *, struct symbol *, const char * fn(data, NULL, "="); fn(data, e->right.sym, e->right.sym->name); break; + case E_LEQ: + case E_LTH: + if (e->left.sym->name) + fn(data, e->left.sym, e->left.sym->name); + else + fn(data, NULL, "<choice>"); + fn(data, NULL, e->type == E_LEQ ? "<=" : "<"); + fn(data, e->right.sym, e->right.sym->name); + break; + case E_GEQ: + case E_GTH: + if (e->left.sym->name) + fn(data, e->left.sym, e->left.sym->name); + else + fn(data, NULL, "<choice>"); + fn(data, NULL, e->type == E_LEQ ? ">=" : ">"); + fn(data, e->right.sym, e->right.sym->name); + break; case E_UNEQUAL: if (e->left.sym->name) fn(data, e->left.sym, e->left.sym->name); diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h index a2fc96a2bd2c..973b6f733368 100644 --- a/scripts/kconfig/expr.h +++ b/scripts/kconfig/expr.h @@ -29,7 +29,9 @@ typedef enum tristate { } tristate; enum expr_type { - E_NONE, E_OR, E_AND, E_NOT, E_EQUAL, E_UNEQUAL, E_LIST, E_SYMBOL, E_RANGE + E_NONE, E_OR, E_AND, E_NOT, + E_EQUAL, E_UNEQUAL, E_LTH, E_LEQ, E_GTH, E_GEQ, + E_LIST, E_SYMBOL, E_RANGE }; union expr_data { diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 6731377f9bb2..70c5ee189dce 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -1166,6 +1166,10 @@ static struct symbol *sym_check_expr_deps(struct expr *e) case E_NOT: return sym_check_expr_deps(e->left.expr); case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: sym = sym_check_deps(e->left.sym); if (sym) diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 6c62d93b4ffb..200a3fe30091 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -122,6 +122,10 @@ n [A-Za-z0-9_] "!" return T_NOT; "=" return T_EQUAL; "!=" return T_UNEQUAL; + "<=" return T_LESS_EQUAL; + ">=" return T_GREATER_EQUAL; + "<" return T_LESS; + ">" return T_GREATER; \"|\' { str = yytext[0]; new_string(); @@ -141,7 +145,12 @@ n [A-Za-z0-9_] } #.* /* comment */ \\\n current_file->lineno++; - . + [[:blank:]]+ + . { + fprintf(stderr, + "%s:%d:warning: ignoring unsupported character '%c'\n", + zconf_curname(), zconf_lineno(), *yytext); + } <<EOF>> { BEGIN(INITIAL); } diff --git a/scripts/kconfig/zconf.lex.c_shipped b/scripts/kconfig/zconf.lex.c_shipped index 349a7f24315b..dd4e86c82521 100644 --- a/scripts/kconfig/zconf.lex.c_shipped +++ b/scripts/kconfig/zconf.lex.c_shipped @@ -365,323 +365,354 @@ int zconflineno = 1; extern char *zconftext; #define yytext_ptr zconftext -static yyconst flex_int16_t yy_nxt[][17] = +static yyconst flex_int16_t yy_nxt[][19] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 11, 12, 13, 14, 12, 12, 15, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12 + 12, 12, 12, 12, 12, 12, 12, 12, 12 }, { 11, 12, 13, 14, 12, 12, 15, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12 + 12, 12, 12, 12, 12, 12, 12, 12, 12 }, { 11, 16, 16, 17, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 18, 16, 16, 16 + 16, 16, 16, 18, 16, 16, 16, 16, 16 }, { 11, 16, 16, 17, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 18, 16, 16, 16 + 16, 16, 16, 18, 16, 16, 16, 16, 16 }, { 11, 19, 20, 21, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19 + 19, 19, 19, 19, 19, 19, 19, 19, 19 }, { 11, 19, 20, 21, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19 + 19, 19, 19, 19, 19, 19, 19, 19, 19 }, { 11, 22, 22, 23, 22, 24, 22, 22, 24, 22, - 22, 22, 22, 22, 22, 25, 22 + 22, 22, 22, 22, 22, 22, 22, 25, 22 }, { 11, 22, 22, 23, 22, 24, 22, 22, 24, 22, - 22, 22, 22, 22, 22, 25, 22 + 22, 22, 22, 22, 22, 22, 22, 25, 22 }, { - 11, 26, 26, 27, 28, 29, 30, 31, 29, 32, - 33, 34, 35, 35, 36, 37, 38 + 11, 26, 27, 28, 29, 30, 31, 32, 30, 33, + 34, 35, 36, 36, 37, 38, 39, 40, 41 }, { - 11, 26, 26, 27, 28, 29, 30, 31, 29, 32, - 33, 34, 35, 35, 36, 37, 38 + 11, 26, 27, 28, 29, 30, 31, 32, 30, 33, + 34, 35, 36, 36, 37, 38, 39, 40, 41 }, { -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, - -11, -11, -11, -11, -11, -11, -11 + -11, -11, -11, -11, -11, -11, -11, -11, -11 }, { 11, -12, -12, -12, -12, -12, -12, -12, -12, -12, - -12, -12, -12, -12, -12, -12, -12 + -12, -12, -12, -12, -12, -12, -12, -12, -12 }, { - 11, -13, 39, 40, -13, -13, 41, -13, -13, -13, - -13, -13, -13, -13, -13, -13, -13 + 11, -13, 42, 43, -13, -13, 44, -13, -13, -13, + -13, -13, -13, -13, -13, -13, -13, -13, -13 }, { 11, -14, -14, -14, -14, -14, -14, -14, -14, -14, - -14, -14, -14, -14, -14, -14, -14 + -14, -14, -14, -14, -14, -14, -14, -14, -14 }, { - 11, 42, 42, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42 + 11, 45, 45, 46, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45 }, { 11, -16, -16, -16, -16, -16, -16, -16, -16, -16, - -16, -16, -16, -16, -16, -16, -16 + -16, -16, -16, -16, -16, -16, -16, -16, -16 }, { 11, -17, -17, -17, -17, -17, -17, -17, -17, -17, - -17, -17, -17, -17, -17, -17, -17 + -17, -17, -17, -17, -17, -17, -17, -17, -17 }, { 11, -18, -18, -18, -18, -18, -18, -18, -18, -18, - -18, -18, -18, 44, -18, -18, -18 + -18, -18, -18, 47, -18, -18, -18, -18, -18 }, { - 11, 45, 45, -19, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45 + 11, 48, 48, -19, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48 }, { - 11, -20, 46, 47, -20, -20, -20, -20, -20, -20, - -20, -20, -20, -20, -20, -20, -20 + 11, -20, 49, 50, -20, -20, -20, -20, -20, -20, + -20, -20, -20, -20, -20, -20, -20, -20, -20 }, { - 11, 48, -21, -21, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48 + 11, 51, -21, -21, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51 }, { - 11, 49, 49, 50, 49, -22, 49, 49, -22, 49, - 49, 49, 49, 49, 49, -22, 49 + 11, 52, 52, 53, 52, -22, 52, 52, -22, 52, + 52, 52, 52, 52, 52, 52, 52, -22, 52 }, { 11, -23, -23, -23, -23, -23, -23, -23, -23, -23, - -23, -23, -23, -23, -23, -23, -23 + -23, -23, -23, -23, -23, -23, -23, -23, -23 }, { 11, -24, -24, -24, -24, -24, -24, -24, -24, -24, - -24, -24, -24, -24, -24, -24, -24 + -24, -24, -24, -24, -24, -24, -24, -24, -24 }, { - 11, 51, 51, 52, 51, 51, 51, 51, 51, 51, - 51, 51, 51, 51, 51, 51, 51 + 11, 54, 54, 55, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54 }, { 11, -26, -26, -26, -26, -26, -26, -26, -26, -26, - -26, -26, -26, -26, -26, -26, -26 + -26, -26, -26, -26, -26, -26, -26, -26, -26 }, { - 11, -27, -27, -27, -27, -27, -27, -27, -27, -27, - -27, -27, -27, -27, -27, -27, -27 + 11, -27, 56, -27, -27, -27, -27, -27, -27, -27, + -27, -27, -27, -27, -27, -27, -27, -27, -27 }, { 11, -28, -28, -28, -28, -28, -28, -28, -28, -28, - -28, -28, -28, -28, 53, -28, -28 + -28, -28, -28, -28, -28, -28, -28, -28, -28 }, { 11, -29, -29, -29, -29, -29, -29, -29, -29, -29, - -29, -29, -29, -29, -29, -29, -29 + -29, -29, -29, -29, -29, 57, -29, -29, -29 }, { - 11, 54, 54, -30, 54, 54, 54, 54, 54, 54, - 54, 54, 54, 54, 54, 54, 54 + 11, -30, -30, -30, -30, -30, -30, -30, -30, -30, + -30, -30, -30, -30, -30, -30, -30, -30, -30 }, { - 11, -31, -31, -31, -31, -31, -31, 55, -31, -31, - -31, -31, -31, -31, -31, -31, -31 + 11, 58, 58, -31, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58 }, { - 11, -32, -32, -32, -32, -32, -32, -32, -32, -32, - -32, -32, -32, -32, -32, -32, -32 + 11, -32, -32, -32, -32, -32, -32, 59, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32 }, { 11, -33, -33, -33, -33, -33, -33, -33, -33, -33, - -33, -33, -33, -33, -33, -33, -33 + -33, -33, -33, -33, -33, -33, -33, -33, -33 }, { 11, -34, -34, -34, -34, -34, -34, -34, -34, -34, - -34, 56, 57, 57, -34, -34, -34 + -34, -34, -34, -34, -34, -34, -34, -34, -34 }, { 11, -35, -35, -35, -35, -35, -35, -35, -35, -35, - -35, 57, 57, 57, -35, -35, -35 + -35, 60, 61, 61, -35, -35, -35, -35, -35 }, { 11, -36, -36, -36, -36, -36, -36, -36, -36, -36, - -36, -36, -36, -36, -36, -36, -36 + -36, 61, 61, 61, -36, -36, -36, -36, -36 }, { - 11, -37, -37, 58, -37, -37, -37, -37, -37, -37, - -37, -37, -37, -37, -37, -37, -37 + 11, -37, -37, -37, -37, -37, -37, -37, -37, -37, + -37, -37, -37, -37, -37, 62, -37, -37, -37 }, { 11, -38, -38, -38, -38, -38, -38, -38, -38, -38, - -38, -38, -38, -38, -38, -38, 59 + -38, -38, -38, -38, -38, -38, -38, -38, -38 }, { - 11, -39, 39, 40, -39, -39, 41, -39, -39, -39, - -39, -39, -39, -39, -39, -39, -39 + 11, -39, -39, -39, -39, -39, -39, -39, -39, -39, + -39, -39, -39, -39, -39, 63, -39, -39, -39 }, { - 11, -40, -40, -40, -40, -40, -40, -40, -40, -40, - -40, -40, -40, -40, -40, -40, -40 + 11, -40, -40, 64, -40, -40, -40, -40, -40, -40, + -40, -40, -40, -40, -40, -40, -40, -40, -40 }, { - 11, 42, 42, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42 + 11, -41, -41, -41, -41, -41, -41, -41, -41, -41, + -41, -41, -41, -41, -41, -41, -41, -41, 65 }, { - 11, 42, 42, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42 + 11, -42, 42, 43, -42, -42, 44, -42, -42, -42, + -42, -42, -42, -42, -42, -42, -42, -42, -42 }, { 11, -43, -43, -43, -43, -43, -43, -43, -43, -43, - -43, -43, -43, -43, -43, -43, -43 + -43, -43, -43, -43, -43, -43, -43, -43, -43 }, { - 11, -44, -44, -44, -44, -44, -44, -44, -44, -44, - -44, -44, -44, 44, -44, -44, -44 + 11, 45, 45, 46, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45 }, { - 11, 45, 45, -45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45 + 11, 45, 45, 46, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45 }, { - 11, -46, 46, 47, -46, -46, -46, -46, -46, -46, - -46, -46, -46, -46, -46, -46, -46 + 11, -46, -46, -46, -46, -46, -46, -46, -46, -46, + -46, -46, -46, -46, -46, -46, -46, -46, -46 }, { - 11, 48, -47, -47, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48 + 11, -47, -47, -47, -47, -47, -47, -47, -47, -47, + -47, -47, -47, 47, -47, -47, -47, -47, -47 }, { - 11, -48, -48, -48, -48, -48, -48, -48, -48, -48, - -48, -48, -48, -48, -48, -48, -48 + 11, 48, 48, -48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48 }, { - 11, 49, 49, 50, 49, -49, 49, 49, -49, 49, - 49, 49, 49, 49, 49, -49, 49 + 11, -49, 49, 50, -49, -49, -49, -49, -49, -49, + -49, -49, -49, -49, -49, -49, -49, -49, -49 }, { - 11, -50, -50, -50, -50, -50, -50, -50, -50, -50, - -50, -50, -50, -50, -50, -50, -50 + 11, 51, -50, -50, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51 }, { - 11, -51, -51, 52, -51, -51, -51, -51, -51, -51, - -51, -51, -51, -51, -51, -51, -51 + 11, -51, -51, -51, -51, -51, -51, -51, -51, -51, + -51, -51, -51, -51, -51, -51, -51, -51, -51 }, { - 11, -52, -52, -52, -52, -52, -52, -52, -52, -52, - -52, -52, -52, -52, -52, -52, -52 + 11, 52, 52, 53, 52, -52, 52, 52, -52, 52, + 52, 52, 52, 52, 52, 52, 52, -52, 52 }, { 11, -53, -53, -53, -53, -53, -53, -53, -53, -53, - -53, -53, -53, -53, -53, -53, -53 + -53, -53, -53, -53, -53, -53, -53, -53, -53 }, { - 11, 54, 54, -54, 54, 54, 54, 54, 54, 54, - 54, 54, 54, 54, 54, 54, 54 + 11, -54, -54, 55, -54, -54, -54, -54, -54, -54, + -54, -54, -54, -54, -54, -54, -54, -54, -54 }, { 11, -55, -55, -55, -55, -55, -55, -55, -55, -55, - -55, -55, -55, -55, -55, -55, -55 + -55, -55, -55, -55, -55, -55, -55, -55, -55 }, { - 11, -56, -56, -56, -56, -56, -56, -56, -56, -56, - -56, 60, 57, 57, -56, -56, -56 + 11, -56, 56, -56, -56, -56, -56, -56, -56, -56, + -56, -56, -56, -56, -56, -56, -56, -56, -56 }, { 11, -57, -57, -57, -57, -57, -57, -57, -57, -57, - -57, 57, 57, 57, -57, -57, -57 + -57, -57, -57, -57, -57, -57, -57, -57, -57 }, { - 11, -58, -58, -58, -58, -58, -58, -58, -58, -58, - -58, -58, -58, -58, -58, -58, -58 + 11, 58, 58, -58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58 }, { 11, -59, -59, -59, -59, -59, -59, -59, -59, -59, - -59, -59, -59, -59, -59, -59, -59 + -59, -59, -59, -59, -59, -59, -59, -59, -59 }, { 11, -60, -60, -60, -60, -60, -60, -60, -60, -60, - -60, 57, 57, 57, -60, -60, -60 + -60, 66, 61, 61, -60, -60, -60, -60, -60 + }, + + { + 11, -61, -61, -61, -61, -61, -61, -61, -61, -61, + -61, 61, 61, 61, -61, -61, -61, -61, -61 + }, + + { + 11, -62, -62, -62, -62, -62, -62, -62, -62, -62, + -62, -62, -62, -62, -62, -62, -62, -62, -62 + }, + + { + 11, -63, -63, -63, -63, -63, -63, -63, -63, -63, + -63, -63, -63, -63, -63, -63, -63, -63, -63 + }, + + { + 11, -64, -64, -64, -64, -64, -64, -64, -64, -64, + -64, -64, -64, -64, -64, -64, -64, -64, -64 + + }, + + { + 11, -65, -65, -65, -65, -65, -65, -65, -65, -65, + -65, -65, -65, -65, -65, -65, -65, -65, -65 + }, + + { + 11, -66, -66, -66, -66, -66, -66, -66, -66, -66, + -66, 61, 61, 61, -66, -66, -66, -66, -66 }, } ; @@ -701,8 +732,8 @@ static void yy_fatal_error (yyconst char msg[] ); *yy_cp = '\0'; \ (yy_c_buf_p) = yy_cp; -#define YY_NUM_RULES 33 -#define YY_END_OF_BUFFER 34 +#define YY_NUM_RULES 38 +#define YY_END_OF_BUFFER 39 /* This struct is not used in this scanner, but its presence is necessary. */ struct yy_trans_info @@ -710,14 +741,15 @@ struct yy_trans_info flex_int32_t yy_verify; flex_int32_t yy_nxt; }; -static yyconst flex_int16_t yy_accept[61] = +static yyconst flex_int16_t yy_accept[67] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 34, 5, 4, 2, 3, 7, 8, 6, 32, 29, - 31, 24, 28, 27, 26, 22, 17, 13, 16, 20, - 22, 11, 12, 19, 19, 14, 22, 22, 4, 2, - 3, 3, 1, 6, 32, 29, 31, 30, 24, 23, - 26, 25, 15, 20, 9, 19, 19, 21, 10, 18 + 39, 5, 4, 2, 3, 7, 8, 6, 37, 34, + 36, 29, 33, 32, 31, 27, 26, 21, 13, 20, + 24, 27, 11, 12, 23, 23, 18, 14, 19, 27, + 27, 4, 2, 3, 3, 1, 6, 37, 34, 36, + 35, 29, 28, 31, 30, 26, 15, 24, 9, 23, + 23, 16, 17, 25, 10, 22 } ; static yyconst flex_int32_t yy_ec[256] = @@ -727,15 +759,15 @@ static yyconst flex_int32_t yy_ec[256] = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 5, 6, 1, 1, 7, 8, 9, 10, 1, 1, 1, 11, 12, 12, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 1, 1, 1, - 14, 1, 1, 1, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 1, 1, 14, + 15, 16, 1, 1, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 1, 15, 1, 1, 13, 1, 13, 13, 13, 13, + 1, 17, 1, 1, 13, 1, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 1, 16, 1, 1, 1, 1, 1, 1, + 13, 13, 1, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -920,7 +952,7 @@ static int input (void ); /* This used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ -#define ECHO do { if (fwrite( zconftext, zconfleng, 1, zconfout )) {} } while (0) +#define ECHO fwrite( zconftext, zconfleng, 1, zconfout ) #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, @@ -1142,22 +1174,38 @@ return T_UNEQUAL; YY_BREAK case 16: YY_RULE_SETUP +return T_LESS_EQUAL; + YY_BREAK +case 17: +YY_RULE_SETUP +return T_GREATER_EQUAL; + YY_BREAK +case 18: +YY_RULE_SETUP +return T_LESS; + YY_BREAK +case 19: +YY_RULE_SETUP +return T_GREATER; + YY_BREAK +case 20: +YY_RULE_SETUP { str = zconftext[0]; new_string(); BEGIN(STRING); } YY_BREAK -case 17: -/* rule 17 can match eol */ +case 21: +/* rule 21 can match eol */ YY_RULE_SETUP BEGIN(INITIAL); current_file->lineno++; return T_EOL; YY_BREAK -case 18: +case 22: YY_RULE_SETUP /* ignore */ YY_BREAK -case 19: +case 23: YY_RULE_SETUP { const struct kconf_id *id = kconf_id_lookup(zconftext, zconfleng); @@ -1170,27 +1218,35 @@ YY_RULE_SETUP return T_WORD; } YY_BREAK -case 20: +case 24: YY_RULE_SETUP /* comment */ YY_BREAK -case 21: -/* rule 21 can match eol */ +case 25: +/* rule 25 can match eol */ YY_RULE_SETUP current_file->lineno++; YY_BREAK -case 22: +case 26: YY_RULE_SETUP YY_BREAK +case 27: +YY_RULE_SETUP +{ + fprintf(stderr, + "%s:%d:warning: ignoring unsupported character '%c'\n", + zconf_curname(), zconf_lineno(), *zconftext); + } + YY_BREAK case YY_STATE_EOF(PARAM): { BEGIN(INITIAL); } YY_BREAK -case 23: -/* rule 23 can match eol */ +case 28: +/* rule 28 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1201,14 +1257,14 @@ YY_RULE_SETUP return T_WORD_QUOTE; } YY_BREAK -case 24: +case 29: YY_RULE_SETUP { append_string(zconftext, zconfleng); } YY_BREAK -case 25: -/* rule 25 can match eol */ +case 30: +/* rule 30 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1219,13 +1275,13 @@ YY_RULE_SETUP return T_WORD_QUOTE; } YY_BREAK -case 26: +case 31: YY_RULE_SETUP { append_string(zconftext + 1, zconfleng - 1); } YY_BREAK -case 27: +case 32: YY_RULE_SETUP { if (str == zconftext[0]) { @@ -1236,8 +1292,8 @@ YY_RULE_SETUP append_string(zconftext, 1); } YY_BREAK -case 28: -/* rule 28 can match eol */ +case 33: +/* rule 33 can match eol */ YY_RULE_SETUP { printf("%s:%d:warning: multi-line strings not supported\n", zconf_curname(), zconf_lineno()); @@ -1252,7 +1308,7 @@ case YY_STATE_EOF(STRING): } YY_BREAK -case 29: +case 34: YY_RULE_SETUP { ts = 0; @@ -1277,8 +1333,8 @@ YY_RULE_SETUP } } YY_BREAK -case 30: -/* rule 30 can match eol */ +case 35: +/* rule 35 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1289,15 +1345,15 @@ YY_RULE_SETUP return T_HELPTEXT; } YY_BREAK -case 31: -/* rule 31 can match eol */ +case 36: +/* rule 36 can match eol */ YY_RULE_SETUP { current_file->lineno++; append_string("\n", 1); } YY_BREAK -case 32: +case 37: YY_RULE_SETUP { while (zconfleng) { @@ -1328,7 +1384,7 @@ case YY_STATE_EOF(COMMAND): yyterminate(); } YY_BREAK -case 33: +case 38: YY_RULE_SETUP YY_FATAL_ERROR( "flex scanner jammed" ); YY_BREAK diff --git a/scripts/kconfig/zconf.tab.c_shipped b/scripts/kconfig/zconf.tab.c_shipped index de5e84ed3f96..7a4d658c2066 100644 --- a/scripts/kconfig/zconf.tab.c_shipped +++ b/scripts/kconfig/zconf.tab.c_shipped @@ -1,8 +1,8 @@ -/* A Bison parser, made by GNU Bison 2.5. */ +/* A Bison parser, made by GNU Bison 2.5.1. */ /* Bison implementation for Yacc-like parsers in C - Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc. + Copyright (C) 1984, 1989-1990, 2000-2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,7 +44,7 @@ #define YYBISON 1 /* Bison version. */ -#define YYBISON_VERSION "2.5" +#define YYBISON_VERSION "2.5.1" /* Skeleton name. */ #define YYSKELETON_NAME "yacc.c" @@ -108,6 +108,14 @@ static struct menu *current_menu, *current_entry; +# ifndef YY_NULL +# if defined __cplusplus && 201103L <= __cplusplus +# define YY_NULL nullptr +# else +# define YY_NULL 0 +# endif +# endif + /* Enabling traces. */ #ifndef YYDEBUG # define YYDEBUG 1 @@ -159,13 +167,17 @@ static struct menu *current_menu, *current_entry; T_WORD = 281, T_WORD_QUOTE = 282, T_UNEQUAL = 283, - T_CLOSE_PAREN = 284, - T_OPEN_PAREN = 285, - T_EOL = 286, - T_OR = 287, - T_AND = 288, - T_EQUAL = 289, - T_NOT = 290 + T_LESS = 284, + T_LESS_EQUAL = 285, + T_GREATER = 286, + T_GREATER_EQUAL = 287, + T_CLOSE_PAREN = 288, + T_OPEN_PAREN = 289, + T_EOL = 290, + T_OR = 291, + T_AND = 292, + T_EQUAL = 293, + T_NOT = 294 }; #endif @@ -304,6 +316,7 @@ YYID (yyi) # if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \ || defined __cplusplus || defined _MSC_VER) # include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ # ifndef EXIT_SUCCESS # define EXIT_SUCCESS 0 # endif @@ -395,20 +408,20 @@ union yyalloc #endif #if defined YYCOPY_NEEDED && YYCOPY_NEEDED -/* Copy COUNT objects from FROM to TO. The source and destination do +/* Copy COUNT objects from SRC to DST. The source and destination do not overlap. */ # ifndef YYCOPY # if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src))) # else -# define YYCOPY(To, From, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ while (YYID (0)) # endif # endif @@ -417,20 +430,20 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 11 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 290 +#define YYLAST 298 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 36 +#define YYNTOKENS 40 /* YYNNTS -- Number of nonterminals. */ #define YYNNTS 50 /* YYNRULES -- Number of rules. */ -#define YYNRULES 118 +#define YYNRULES 122 /* YYNRULES -- Number of states. */ -#define YYNSTATES 191 +#define YYNSTATES 199 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ #define YYUNDEFTOK 2 -#define YYMAXUTOK 290 +#define YYMAXUTOK 294 #define YYTRANSLATE(YYX) \ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) @@ -467,7 +480,7 @@ static const yytype_uint8 yytranslate[] = 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35 + 35, 36, 37, 38, 39 }; #if YYDEBUG @@ -486,64 +499,67 @@ static const yytype_uint16 yyprhs[] = 235, 238, 241, 244, 248, 252, 255, 258, 261, 262, 265, 268, 271, 276, 277, 280, 283, 286, 287, 290, 292, 294, 297, 300, 303, 305, 308, 309, 312, 314, - 318, 322, 326, 329, 333, 337, 339, 341, 342 + 318, 322, 326, 330, 334, 338, 342, 345, 349, 353, + 355, 357, 358 }; /* YYRHS -- A `-1'-separated list of the rules' RHS. */ static const yytype_int8 yyrhs[] = { - 37, 0, -1, 81, 38, -1, 38, -1, 63, 39, - -1, 39, -1, -1, 39, 41, -1, 39, 55, -1, - 39, 67, -1, 39, 80, -1, 39, 26, 1, 31, - -1, 39, 40, 1, 31, -1, 39, 1, 31, -1, + 41, 0, -1, 85, 42, -1, 42, -1, 67, 43, + -1, 43, -1, -1, 43, 45, -1, 43, 59, -1, + 43, 71, -1, 43, 84, -1, 43, 26, 1, 35, + -1, 43, 44, 1, 35, -1, 43, 1, 35, -1, 16, -1, 18, -1, 19, -1, 21, -1, 17, -1, - 22, -1, 20, -1, 23, -1, 31, -1, 61, -1, - 71, -1, 44, -1, 46, -1, 69, -1, 26, 1, - 31, -1, 1, 31, -1, 10, 26, 31, -1, 43, - 47, -1, 11, 26, 31, -1, 45, 47, -1, -1, - 47, 48, -1, 47, 49, -1, 47, 75, -1, 47, - 73, -1, 47, 42, -1, 47, 31, -1, 19, 78, - 31, -1, 18, 79, 82, 31, -1, 20, 83, 82, - 31, -1, 21, 26, 82, 31, -1, 22, 84, 84, - 82, 31, -1, 24, 50, 31, -1, -1, 50, 26, - 51, -1, -1, 34, 79, -1, 7, 85, 31, -1, - 52, 56, -1, 80, -1, 53, 58, 54, -1, -1, - 56, 57, -1, 56, 75, -1, 56, 73, -1, 56, - 31, -1, 56, 42, -1, 18, 79, 82, 31, -1, - 19, 78, 31, -1, 17, 31, -1, 20, 26, 82, - 31, -1, -1, 58, 41, -1, 14, 83, 81, -1, - 80, -1, 59, 62, 60, -1, -1, 62, 41, -1, - 62, 67, -1, 62, 55, -1, 3, 79, 81, -1, - 4, 79, 31, -1, 64, 76, 74, -1, 80, -1, - 65, 68, 66, -1, -1, 68, 41, -1, 68, 67, - -1, 68, 55, -1, 6, 79, 31, -1, 9, 79, - 31, -1, 70, 74, -1, 12, 31, -1, 72, 13, - -1, -1, 74, 75, -1, 74, 31, -1, 74, 42, - -1, 16, 25, 83, 31, -1, -1, 76, 77, -1, - 76, 31, -1, 23, 82, -1, -1, 79, 82, -1, - 26, -1, 27, -1, 5, 31, -1, 8, 31, -1, - 15, 31, -1, 31, -1, 81, 31, -1, -1, 14, - 83, -1, 84, -1, 84, 34, 84, -1, 84, 28, - 84, -1, 30, 83, 29, -1, 35, 83, -1, 83, - 32, 83, -1, 83, 33, 83, -1, 26, -1, 27, - -1, -1, 26, -1 + 22, -1, 20, -1, 23, -1, 35, -1, 65, -1, + 75, -1, 48, -1, 50, -1, 73, -1, 26, 1, + 35, -1, 1, 35, -1, 10, 26, 35, -1, 47, + 51, -1, 11, 26, 35, -1, 49, 51, -1, -1, + 51, 52, -1, 51, 53, -1, 51, 79, -1, 51, + 77, -1, 51, 46, -1, 51, 35, -1, 19, 82, + 35, -1, 18, 83, 86, 35, -1, 20, 87, 86, + 35, -1, 21, 26, 86, 35, -1, 22, 88, 88, + 86, 35, -1, 24, 54, 35, -1, -1, 54, 26, + 55, -1, -1, 38, 83, -1, 7, 89, 35, -1, + 56, 60, -1, 84, -1, 57, 62, 58, -1, -1, + 60, 61, -1, 60, 79, -1, 60, 77, -1, 60, + 35, -1, 60, 46, -1, 18, 83, 86, 35, -1, + 19, 82, 35, -1, 17, 35, -1, 20, 26, 86, + 35, -1, -1, 62, 45, -1, 14, 87, 85, -1, + 84, -1, 63, 66, 64, -1, -1, 66, 45, -1, + 66, 71, -1, 66, 59, -1, 3, 83, 85, -1, + 4, 83, 35, -1, 68, 80, 78, -1, 84, -1, + 69, 72, 70, -1, -1, 72, 45, -1, 72, 71, + -1, 72, 59, -1, 6, 83, 35, -1, 9, 83, + 35, -1, 74, 78, -1, 12, 35, -1, 76, 13, + -1, -1, 78, 79, -1, 78, 35, -1, 78, 46, + -1, 16, 25, 87, 35, -1, -1, 80, 81, -1, + 80, 35, -1, 23, 86, -1, -1, 83, 86, -1, + 26, -1, 27, -1, 5, 35, -1, 8, 35, -1, + 15, 35, -1, 35, -1, 85, 35, -1, -1, 14, + 87, -1, 88, -1, 88, 29, 88, -1, 88, 30, + 88, -1, 88, 31, 88, -1, 88, 32, 88, -1, + 88, 38, 88, -1, 88, 28, 88, -1, 34, 87, + 33, -1, 39, 87, -1, 87, 36, 87, -1, 87, + 37, 87, -1, 26, -1, 27, -1, -1, 26, -1 }; /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint16 yyrline[] = { - 0, 103, 103, 103, 105, 105, 107, 109, 110, 111, - 112, 113, 114, 118, 122, 122, 122, 122, 122, 122, - 122, 122, 126, 127, 128, 129, 130, 131, 135, 136, - 142, 150, 156, 164, 174, 176, 177, 178, 179, 180, - 181, 184, 192, 198, 208, 214, 220, 223, 225, 236, - 237, 242, 251, 256, 264, 267, 269, 270, 271, 272, - 273, 276, 282, 293, 299, 309, 311, 316, 324, 332, - 335, 337, 338, 339, 344, 351, 358, 363, 371, 374, - 376, 377, 378, 381, 389, 396, 403, 409, 416, 418, - 419, 420, 423, 431, 433, 434, 437, 444, 446, 451, - 452, 455, 456, 457, 461, 462, 465, 466, 469, 470, - 471, 472, 473, 474, 475, 478, 479, 482, 483 + 0, 108, 108, 108, 110, 110, 112, 114, 115, 116, + 117, 118, 119, 123, 127, 127, 127, 127, 127, 127, + 127, 127, 131, 132, 133, 134, 135, 136, 140, 141, + 147, 155, 161, 169, 179, 181, 182, 183, 184, 185, + 186, 189, 197, 203, 213, 219, 225, 228, 230, 241, + 242, 247, 256, 261, 269, 272, 274, 275, 276, 277, + 278, 281, 287, 298, 304, 314, 316, 321, 329, 337, + 340, 342, 343, 344, 349, 356, 363, 368, 376, 379, + 381, 382, 383, 386, 394, 401, 408, 414, 421, 423, + 424, 425, 428, 436, 438, 439, 442, 449, 451, 456, + 457, 460, 461, 462, 466, 467, 470, 471, 474, 475, + 476, 477, 478, 479, 480, 481, 482, 483, 484, 487, + 488, 491, 492 }; #endif @@ -557,6 +573,7 @@ static const char *const yytname[] = "T_MENUCONFIG", "T_HELP", "T_HELPTEXT", "T_IF", "T_ENDIF", "T_DEPENDS", "T_OPTIONAL", "T_PROMPT", "T_TYPE", "T_DEFAULT", "T_SELECT", "T_RANGE", "T_VISIBLE", "T_OPTION", "T_ON", "T_WORD", "T_WORD_QUOTE", "T_UNEQUAL", + "T_LESS", "T_LESS_EQUAL", "T_GREATER", "T_GREATER_EQUAL", "T_CLOSE_PAREN", "T_OPEN_PAREN", "T_EOL", "T_OR", "T_AND", "T_EQUAL", "T_NOT", "$accept", "input", "start", "stmt_list", "option_name", "common_stmt", "option_error", "config_entry_start", "config_stmt", @@ -568,7 +585,7 @@ static const char *const yytname[] = "menu_entry", "menu_end", "menu_stmt", "menu_block", "source_stmt", "comment", "comment_stmt", "help_start", "help", "depends_list", "depends", "visibility_list", "visible", "prompt_stmt_opt", "prompt", - "end", "nl", "if_expr", "expr", "symbol", "word_opt", 0 + "end", "nl", "if_expr", "expr", "symbol", "word_opt", YY_NULL }; #endif @@ -580,25 +597,26 @@ static const yytype_uint16 yytoknum[] = 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290 + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294 }; # endif /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_uint8 yyr1[] = { - 0, 36, 37, 37, 38, 38, 39, 39, 39, 39, - 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, - 40, 40, 41, 41, 41, 41, 41, 41, 42, 42, - 43, 44, 45, 46, 47, 47, 47, 47, 47, 47, - 47, 48, 48, 48, 48, 48, 49, 50, 50, 51, - 51, 52, 53, 54, 55, 56, 56, 56, 56, 56, - 56, 57, 57, 57, 57, 58, 58, 59, 60, 61, - 62, 62, 62, 62, 63, 64, 65, 66, 67, 68, - 68, 68, 68, 69, 70, 71, 72, 73, 74, 74, - 74, 74, 75, 76, 76, 76, 77, 78, 78, 79, - 79, 80, 80, 80, 81, 81, 82, 82, 83, 83, - 83, 83, 83, 83, 83, 84, 84, 85, 85 + 0, 40, 41, 41, 42, 42, 43, 43, 43, 43, + 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, + 44, 44, 45, 45, 45, 45, 45, 45, 46, 46, + 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, + 51, 52, 52, 52, 52, 52, 53, 54, 54, 55, + 55, 56, 57, 58, 59, 60, 60, 60, 60, 60, + 60, 61, 61, 61, 61, 62, 62, 63, 64, 65, + 66, 66, 66, 66, 67, 68, 69, 70, 71, 72, + 72, 72, 72, 73, 74, 75, 76, 77, 78, 78, + 78, 78, 79, 80, 80, 80, 81, 82, 82, 83, + 83, 84, 84, 84, 85, 85, 86, 86, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, + 88, 89, 89 }; /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ @@ -615,7 +633,8 @@ static const yytype_uint8 yyr2[] = 2, 2, 2, 3, 3, 2, 2, 2, 0, 2, 2, 2, 4, 0, 2, 2, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 0, 2, 1, 3, - 3, 3, 2, 3, 3, 1, 1, 0, 1 + 3, 3, 3, 3, 3, 3, 2, 3, 3, 1, + 1, 0, 1 }; /* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM. @@ -624,72 +643,72 @@ static const yytype_uint8 yyr2[] = static const yytype_uint8 yydefact[] = { 6, 0, 104, 0, 3, 0, 6, 6, 99, 100, - 0, 1, 0, 0, 0, 0, 117, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 121, 0, 0, 0, 0, 0, 0, 14, 18, 15, 16, 20, 17, 19, 21, 0, 22, 0, 7, 34, 25, 34, 26, 55, 65, 8, 70, 23, 93, 79, 9, 27, 88, 24, - 10, 0, 105, 2, 74, 13, 0, 101, 0, 118, - 0, 102, 0, 0, 0, 115, 116, 0, 0, 0, + 10, 0, 105, 2, 74, 13, 0, 101, 0, 122, + 0, 102, 0, 0, 0, 119, 120, 0, 0, 0, 108, 103, 0, 0, 0, 0, 0, 0, 0, 88, - 0, 0, 75, 83, 51, 84, 30, 32, 0, 112, - 0, 0, 67, 0, 0, 11, 12, 0, 0, 0, - 0, 97, 0, 0, 0, 47, 0, 40, 39, 35, - 36, 0, 38, 37, 0, 0, 97, 0, 59, 60, - 56, 58, 57, 66, 54, 53, 71, 73, 69, 72, - 68, 106, 95, 0, 94, 80, 82, 78, 81, 77, - 90, 91, 89, 111, 113, 114, 110, 109, 29, 86, - 0, 106, 0, 106, 106, 106, 0, 0, 0, 87, - 63, 106, 0, 106, 0, 96, 0, 0, 41, 98, - 0, 0, 106, 49, 46, 28, 0, 62, 0, 107, - 92, 42, 43, 44, 0, 0, 48, 61, 64, 45, - 50 + 0, 0, 75, 83, 51, 84, 30, 32, 0, 116, + 0, 0, 67, 0, 0, 0, 0, 0, 0, 11, + 12, 0, 0, 0, 0, 97, 0, 0, 0, 47, + 0, 40, 39, 35, 36, 0, 38, 37, 0, 0, + 97, 0, 59, 60, 56, 58, 57, 66, 54, 53, + 71, 73, 69, 72, 68, 106, 95, 0, 94, 80, + 82, 78, 81, 77, 90, 91, 89, 115, 117, 118, + 114, 109, 110, 111, 112, 113, 29, 86, 0, 106, + 0, 106, 106, 106, 0, 0, 0, 87, 63, 106, + 0, 106, 0, 96, 0, 0, 41, 98, 0, 0, + 106, 49, 46, 28, 0, 62, 0, 107, 92, 42, + 43, 44, 0, 0, 48, 61, 64, 45, 50 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int16 yydefgoto[] = { - -1, 3, 4, 5, 33, 34, 108, 35, 36, 37, - 38, 74, 109, 110, 157, 186, 39, 40, 124, 41, - 76, 120, 77, 42, 128, 43, 78, 6, 44, 45, - 137, 46, 80, 47, 48, 49, 111, 112, 81, 113, - 79, 134, 152, 153, 50, 7, 165, 69, 70, 60 + -1, 3, 4, 5, 33, 34, 112, 35, 36, 37, + 38, 74, 113, 114, 165, 194, 39, 40, 128, 41, + 76, 124, 77, 42, 132, 43, 78, 6, 44, 45, + 141, 46, 80, 47, 48, 49, 115, 116, 81, 117, + 79, 138, 160, 161, 50, 7, 173, 69, 70, 60 }; /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ -#define YYPACT_NINF -90 +#define YYPACT_NINF -91 static const yytype_int16 yypact[] = { - 4, 42, -90, 96, -90, 111, -90, 15, -90, -90, - 75, -90, 82, 42, 104, 42, 110, 107, 42, 115, - 125, -4, 121, -90, -90, -90, -90, -90, -90, -90, - -90, 162, -90, 163, -90, -90, -90, -90, -90, -90, - -90, -90, -90, -90, -90, -90, -90, -90, -90, -90, - -90, 139, -90, -90, 138, -90, 142, -90, 143, -90, - 152, -90, 164, 167, 168, -90, -90, -4, -4, 77, - -18, -90, 177, 185, 33, 71, 195, 247, 236, -2, - 236, 171, -90, -90, -90, -90, -90, -90, 41, -90, - -4, -4, 138, 97, 97, -90, -90, 186, 187, 194, - 42, 42, -4, 196, 97, -90, 219, -90, -90, -90, - -90, 210, -90, -90, 204, 42, 42, 199, -90, -90, - -90, -90, -90, -90, -90, -90, -90, -90, -90, -90, - -90, 222, -90, 223, -90, -90, -90, -90, -90, -90, - -90, -90, -90, -90, 215, -90, -90, -90, -90, -90, - -4, 222, 228, 222, -5, 222, 97, 35, 229, -90, - -90, 222, 232, 222, -4, -90, 135, 233, -90, -90, - 234, 235, 222, 240, -90, -90, 237, -90, 239, -13, - -90, -90, -90, -90, 244, 42, -90, -90, -90, -90, - -90 + 19, 37, -91, 13, -91, 79, -91, 20, -91, -91, + -16, -91, 21, 37, 25, 37, 41, 36, 37, 78, + 83, 31, 56, -91, -91, -91, -91, -91, -91, -91, + -91, 116, -91, 127, -91, -91, -91, -91, -91, -91, + -91, -91, -91, -91, -91, -91, -91, -91, -91, -91, + -91, 147, -91, -91, 105, -91, 109, -91, 111, -91, + 114, -91, 136, 137, 142, -91, -91, 31, 31, 76, + 254, -91, 143, 146, 27, 115, 207, 258, 243, -14, + 243, 179, -91, -91, -91, -91, -91, -91, -7, -91, + 31, 31, 105, 51, 51, 51, 51, 51, 51, -91, + -91, 156, 168, 181, 37, 37, 31, 178, 51, -91, + 206, -91, -91, -91, -91, 196, -91, -91, 175, 37, + 37, 185, -91, -91, -91, -91, -91, -91, -91, -91, + -91, -91, -91, -91, -91, 214, -91, 230, -91, -91, + -91, -91, -91, -91, -91, -91, -91, -91, 183, -91, + -91, -91, -91, -91, -91, -91, -91, -91, 31, 214, + 194, 214, 45, 214, 51, 26, 195, -91, -91, 214, + 197, 214, 31, -91, 139, 208, -91, -91, 220, 224, + 214, 222, -91, -91, 226, -91, 227, 123, -91, -91, + -91, -91, 235, 37, -91, -91, -91, -91, -91 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int16 yypgoto[] = { - -90, -90, 269, 271, -90, 23, -70, -90, -90, -90, - -90, 243, -90, -90, -90, -90, -90, -90, -90, -48, - -90, -90, -90, -90, -90, -90, -90, -90, -90, -90, - -90, -20, -90, -90, -90, -90, -90, 206, 205, -68, - -90, -90, 169, -1, 27, -7, 118, -66, -89, -90 + -91, -91, 264, 268, -91, 30, -65, -91, -91, -91, + -91, 238, -91, -91, -91, -91, -91, -91, -91, -12, + -91, -91, -91, -91, -91, -91, -91, -91, -91, -91, + -91, -5, -91, -91, -91, -91, -91, 200, 209, -61, + -91, -91, 170, -1, 65, 0, 118, -66, -90, -91 }; /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If @@ -698,102 +717,102 @@ static const yytype_int16 yypgoto[] = #define YYTABLE_NINF -86 static const yytype_int16 yytable[] = { - 10, 88, 89, 54, 146, 147, 119, 1, 122, 164, - 93, 141, 56, 142, 58, 156, 94, 62, 1, 90, - 91, 131, 65, 66, 144, 145, 67, 90, 91, 132, - 127, 68, 136, -31, 97, 2, 154, -31, -31, -31, - -31, -31, -31, -31, -31, 98, 52, -31, -31, 99, - -31, 100, 101, 102, 103, 104, -31, 105, 129, 106, - 138, 173, 92, 141, 107, 142, 174, 172, 8, 9, - 143, -33, 97, 90, 91, -33, -33, -33, -33, -33, - -33, -33, -33, 98, 166, -33, -33, 99, -33, 100, - 101, 102, 103, 104, -33, 105, 11, 106, 179, 151, - 123, 126, 107, 135, 125, 130, 2, 139, 2, 90, - 91, -5, 12, 55, 161, 13, 14, 15, 16, 17, - 18, 19, 20, 65, 66, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 57, 59, 31, 61, -4, - 12, 63, 32, 13, 14, 15, 16, 17, 18, 19, - 20, 64, 71, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 72, 73, 31, 180, 90, 91, 52, - 32, -85, 97, 82, 83, -85, -85, -85, -85, -85, - -85, -85, -85, 84, 190, -85, -85, 99, -85, -85, - -85, -85, -85, -85, -85, 85, 97, 106, 86, 87, - -52, -52, 140, -52, -52, -52, -52, 98, 95, -52, - -52, 99, 114, 115, 116, 117, 96, 148, 149, 150, - 158, 106, 155, 159, 97, 163, 118, -76, -76, -76, - -76, -76, -76, -76, -76, 160, 164, -76, -76, 99, - 13, 14, 15, 16, 17, 18, 19, 20, 91, 106, - 21, 22, 14, 15, 140, 17, 18, 19, 20, 168, - 175, 21, 22, 177, 181, 182, 183, 32, 187, 167, - 188, 169, 170, 171, 185, 189, 53, 51, 32, 176, - 75, 178, 121, 0, 133, 162, 0, 0, 0, 0, - 184 + 10, 88, 89, 150, 151, 152, 153, 154, 155, 135, + 54, 123, 56, 11, 58, 126, 145, 62, 164, 2, + 146, 136, 1, 1, 148, 149, 147, -31, 101, 90, + 91, -31, -31, -31, -31, -31, -31, -31, -31, 102, + 162, -31, -31, 103, -31, 104, 105, 106, 107, 108, + -31, 109, 181, 110, 2, 52, 55, 65, 66, 172, + 57, 182, 111, 8, 9, 67, 131, 59, 140, 92, + 68, 61, 145, 133, 180, 142, 146, 65, 66, -5, + 12, 90, 91, 13, 14, 15, 16, 17, 18, 19, + 20, 71, 174, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 159, 63, 31, 187, 127, 130, 64, + 139, 2, 90, 91, 32, -33, 101, 72, 169, -33, + -33, -33, -33, -33, -33, -33, -33, 102, 73, -33, + -33, 103, -33, 104, 105, 106, 107, 108, -33, 109, + 52, 110, 129, 134, 82, 143, 83, -4, 12, 84, + 111, 13, 14, 15, 16, 17, 18, 19, 20, 90, + 91, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 85, 86, 31, 188, 90, 91, 87, 99, -85, + 101, 100, 32, -85, -85, -85, -85, -85, -85, -85, + -85, 156, 198, -85, -85, 103, -85, -85, -85, -85, + -85, -85, -85, 157, 163, 110, 158, 166, 101, 167, + 168, 171, -52, -52, 144, -52, -52, -52, -52, 102, + 91, -52, -52, 103, 118, 119, 120, 121, 172, 176, + 183, 101, 185, 110, -76, -76, -76, -76, -76, -76, + -76, -76, 122, 189, -76, -76, 103, 13, 14, 15, + 16, 17, 18, 19, 20, 190, 110, 21, 22, 191, + 193, 195, 196, 14, 15, 144, 17, 18, 19, 20, + 197, 53, 21, 22, 51, 75, 125, 175, 32, 177, + 178, 179, 93, 94, 95, 96, 97, 184, 137, 186, + 170, 0, 98, 32, 0, 0, 0, 0, 192 }; #define yypact_value_is_default(yystate) \ - ((yystate) == (-90)) + ((yystate) == (-91)) #define yytable_value_is_error(yytable_value) \ YYID (0) static const yytype_int16 yycheck[] = { - 1, 67, 68, 10, 93, 94, 76, 3, 76, 14, - 28, 81, 13, 81, 15, 104, 34, 18, 3, 32, - 33, 23, 26, 27, 90, 91, 30, 32, 33, 31, - 78, 35, 80, 0, 1, 31, 102, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 31, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 78, 26, - 80, 26, 69, 133, 31, 133, 31, 156, 26, 27, - 29, 0, 1, 32, 33, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 150, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 0, 26, 164, 100, - 77, 78, 31, 80, 77, 78, 31, 80, 31, 32, - 33, 0, 1, 31, 115, 4, 5, 6, 7, 8, - 9, 10, 11, 26, 27, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 31, 26, 26, 31, 0, - 1, 26, 31, 4, 5, 6, 7, 8, 9, 10, - 11, 26, 31, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 1, 1, 26, 31, 32, 33, 31, - 31, 0, 1, 31, 31, 4, 5, 6, 7, 8, - 9, 10, 11, 31, 185, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 31, 1, 26, 31, 31, - 5, 6, 31, 8, 9, 10, 11, 12, 31, 14, - 15, 16, 17, 18, 19, 20, 31, 31, 31, 25, - 1, 26, 26, 13, 1, 26, 31, 4, 5, 6, - 7, 8, 9, 10, 11, 31, 14, 14, 15, 16, - 4, 5, 6, 7, 8, 9, 10, 11, 33, 26, - 14, 15, 5, 6, 31, 8, 9, 10, 11, 31, - 31, 14, 15, 31, 31, 31, 31, 31, 31, 151, - 31, 153, 154, 155, 34, 31, 7, 6, 31, 161, - 37, 163, 76, -1, 79, 116, -1, -1, -1, -1, - 172 + 1, 67, 68, 93, 94, 95, 96, 97, 98, 23, + 10, 76, 13, 0, 15, 76, 81, 18, 108, 35, + 81, 35, 3, 3, 90, 91, 33, 0, 1, 36, + 37, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 106, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 26, 26, 35, 35, 35, 26, 27, 14, + 35, 35, 35, 26, 27, 34, 78, 26, 80, 69, + 39, 35, 137, 78, 164, 80, 137, 26, 27, 0, + 1, 36, 37, 4, 5, 6, 7, 8, 9, 10, + 11, 35, 158, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 104, 26, 26, 172, 77, 78, 26, + 80, 35, 36, 37, 35, 0, 1, 1, 119, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 1, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 35, 26, 77, 78, 35, 80, 35, 0, 1, 35, + 35, 4, 5, 6, 7, 8, 9, 10, 11, 36, + 37, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 35, 35, 26, 35, 36, 37, 35, 35, 0, + 1, 35, 35, 4, 5, 6, 7, 8, 9, 10, + 11, 35, 193, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 35, 26, 26, 25, 1, 1, 13, + 35, 26, 5, 6, 35, 8, 9, 10, 11, 12, + 37, 14, 15, 16, 17, 18, 19, 20, 14, 35, + 35, 1, 35, 26, 4, 5, 6, 7, 8, 9, + 10, 11, 35, 35, 14, 15, 16, 4, 5, 6, + 7, 8, 9, 10, 11, 35, 26, 14, 15, 35, + 38, 35, 35, 5, 6, 35, 8, 9, 10, 11, + 35, 7, 14, 15, 6, 37, 76, 159, 35, 161, + 162, 163, 28, 29, 30, 31, 32, 169, 79, 171, + 120, -1, 38, 35, -1, -1, -1, -1, 180 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing symbol of state STATE-NUM. */ static const yytype_uint8 yystos[] = { - 0, 3, 31, 37, 38, 39, 63, 81, 26, 27, - 79, 0, 1, 4, 5, 6, 7, 8, 9, 10, + 0, 3, 35, 41, 42, 43, 67, 85, 26, 27, + 83, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 26, 31, 40, 41, 43, 44, 45, 46, 52, - 53, 55, 59, 61, 64, 65, 67, 69, 70, 71, - 80, 39, 31, 38, 81, 31, 79, 31, 79, 26, - 85, 31, 79, 26, 26, 26, 27, 30, 35, 83, - 84, 31, 1, 1, 47, 47, 56, 58, 62, 76, - 68, 74, 31, 31, 31, 31, 31, 31, 83, 83, - 32, 33, 81, 28, 34, 31, 31, 1, 12, 16, - 18, 19, 20, 21, 22, 24, 26, 31, 42, 48, - 49, 72, 73, 75, 17, 18, 19, 20, 31, 42, - 57, 73, 75, 41, 54, 80, 41, 55, 60, 67, - 80, 23, 31, 74, 77, 41, 55, 66, 67, 80, - 31, 42, 75, 29, 83, 83, 84, 84, 31, 31, - 25, 79, 78, 79, 83, 26, 84, 50, 1, 13, - 31, 79, 78, 26, 14, 82, 83, 82, 31, 82, - 82, 82, 84, 26, 31, 31, 82, 31, 82, 83, - 31, 31, 31, 31, 82, 34, 51, 31, 31, 31, - 79 + 23, 26, 35, 44, 45, 47, 48, 49, 50, 56, + 57, 59, 63, 65, 68, 69, 71, 73, 74, 75, + 84, 43, 35, 42, 85, 35, 83, 35, 83, 26, + 89, 35, 83, 26, 26, 26, 27, 34, 39, 87, + 88, 35, 1, 1, 51, 51, 60, 62, 66, 80, + 72, 78, 35, 35, 35, 35, 35, 35, 87, 87, + 36, 37, 85, 28, 29, 30, 31, 32, 38, 35, + 35, 1, 12, 16, 18, 19, 20, 21, 22, 24, + 26, 35, 46, 52, 53, 76, 77, 79, 17, 18, + 19, 20, 35, 46, 61, 77, 79, 45, 58, 84, + 45, 59, 64, 71, 84, 23, 35, 78, 81, 45, + 59, 70, 71, 84, 35, 46, 79, 33, 87, 87, + 88, 88, 88, 88, 88, 88, 35, 35, 25, 83, + 82, 83, 87, 26, 88, 54, 1, 13, 35, 83, + 82, 26, 14, 86, 87, 86, 35, 86, 86, 86, + 88, 26, 35, 35, 86, 35, 86, 87, 35, 35, + 35, 35, 86, 38, 55, 35, 35, 35, 83 }; #define yyerrok (yyerrstatus = 0) @@ -823,17 +842,18 @@ static const yytype_uint8 yystos[] = #define YYRECOVERING() (!!yyerrstatus) -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - YYPOPSTACK (1); \ - goto yybackup; \ - } \ - else \ - { \ +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ yyerror (YY_("syntax error: cannot back up")); \ YYERROR; \ } \ @@ -928,6 +948,8 @@ yy_symbol_value_print (yyoutput, yytype, yyvaluep) YYSTYPE const * const yyvaluep; #endif { + FILE *yyo = yyoutput; + YYUSE (yyo); if (!yyvaluep) return; # ifdef YYPRINT @@ -1179,12 +1201,12 @@ static int yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, yytype_int16 *yyssp, int yytoken) { - YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]); + YYSIZE_T yysize0 = yytnamerr (YY_NULL, yytname[yytoken]); YYSIZE_T yysize = yysize0; YYSIZE_T yysize1; enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; /* Internationalized format string. */ - const char *yyformat = 0; + const char *yyformat = YY_NULL; /* Arguments of yyformat. */ char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; /* Number of reported tokens (one for the "unexpected", one per @@ -1244,7 +1266,7 @@ yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, break; } yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yytnamerr (0, yytname[yyx]); + yysize1 = yysize + yytnamerr (YY_NULL, yytname[yyx]); if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) return 2; @@ -1329,7 +1351,7 @@ yydestruct (yymsg, yytype, yyvaluep) switch (yytype) { - case 53: /* "choice_entry" */ + case 57: /* "choice_entry" */ { fprintf(stderr, "%s:%d: missing end statement for this entry\n", @@ -1339,7 +1361,7 @@ yydestruct (yymsg, yytype, yyvaluep) }; break; - case 59: /* "if_entry" */ + case 63: /* "if_entry" */ { fprintf(stderr, "%s:%d: missing end statement for this entry\n", @@ -1349,7 +1371,7 @@ yydestruct (yymsg, yytype, yyvaluep) }; break; - case 65: /* "menu_entry" */ + case 69: /* "menu_entry" */ { fprintf(stderr, "%s:%d: missing end statement for this entry\n", @@ -1426,7 +1448,7 @@ yyparse () `yyss': related to states. `yyvs': related to semantic values. - Refer to the stacks thru separate pointers, to allow yyoverflow + Refer to the stacks through separate pointers, to allow yyoverflow to reallocate them elsewhere. */ /* The state stack. */ @@ -2012,46 +2034,66 @@ yyreduce: case 109: - { (yyval.expr) = expr_alloc_comp(E_EQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } + { (yyval.expr) = expr_alloc_comp(E_LTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 110: - { (yyval.expr) = expr_alloc_comp(E_UNEQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } + { (yyval.expr) = expr_alloc_comp(E_LEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 111: - { (yyval.expr) = (yyvsp[(2) - (3)].expr); } + { (yyval.expr) = expr_alloc_comp(E_GTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 112: - { (yyval.expr) = expr_alloc_one(E_NOT, (yyvsp[(2) - (2)].expr)); } + { (yyval.expr) = expr_alloc_comp(E_GEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 113: - { (yyval.expr) = expr_alloc_two(E_OR, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + { (yyval.expr) = expr_alloc_comp(E_EQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 114: - { (yyval.expr) = expr_alloc_two(E_AND, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + { (yyval.expr) = expr_alloc_comp(E_UNEQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 115: - { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), 0); free((yyvsp[(1) - (1)].string)); } + { (yyval.expr) = (yyvsp[(2) - (3)].expr); } break; case 116: - { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), SYMBOL_CONST); free((yyvsp[(1) - (1)].string)); } + { (yyval.expr) = expr_alloc_one(E_NOT, (yyvsp[(2) - (2)].expr)); } break; case 117: + { (yyval.expr) = expr_alloc_two(E_OR, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + break; + + case 118: + + { (yyval.expr) = expr_alloc_two(E_AND, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + break; + + case 119: + + { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), 0); free((yyvsp[(1) - (1)].string)); } + break; + + case 120: + + { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), SYMBOL_CONST); free((yyvsp[(1) - (1)].string)); } + break; + + case 121: + { (yyval.string) = NULL; } break; @@ -2243,7 +2285,7 @@ yyabortlab: yyresult = 1; goto yyreturn; -#if !defined(yyoverflow) || YYERROR_VERBOSE +#if !defined yyoverflow || YYERROR_VERBOSE /*-------------------------------------------------. | yyexhaustedlab -- memory exhaustion comes here. | `-------------------------------------------------*/ diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index 0f683cfa53e9..71bf8bff696a 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -69,6 +69,10 @@ static struct menu *current_menu, *current_entry; %token <string> T_WORD %token <string> T_WORD_QUOTE %token T_UNEQUAL +%token T_LESS +%token T_LESS_EQUAL +%token T_GREATER +%token T_GREATER_EQUAL %token T_CLOSE_PAREN %token T_OPEN_PAREN %token T_EOL @@ -76,6 +80,7 @@ static struct menu *current_menu, *current_entry; %left T_OR %left T_AND %left T_EQUAL T_UNEQUAL +%left T_LESS T_LESS_EQUAL T_GREATER T_GREATER_EQUAL %nonassoc T_NOT %type <string> prompt @@ -467,6 +472,10 @@ if_expr: /* empty */ { $$ = NULL; } ; expr: symbol { $$ = expr_alloc_symbol($1); } + | symbol T_LESS symbol { $$ = expr_alloc_comp(E_LTH, $1, $3); } + | symbol T_LESS_EQUAL symbol { $$ = expr_alloc_comp(E_LEQ, $1, $3); } + | symbol T_GREATER symbol { $$ = expr_alloc_comp(E_GTH, $1, $3); } + | symbol T_GREATER_EQUAL symbol { $$ = expr_alloc_comp(E_GEQ, $1, $3); } | symbol T_EQUAL symbol { $$ = expr_alloc_comp(E_EQUAL, $1, $3); } | symbol T_UNEQUAL symbol { $$ = expr_alloc_comp(E_UNEQUAL, $1, $3); } | T_OPEN_PAREN expr T_CLOSE_PAREN { $$ = $2; } diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 86a4fe75f453..1a10d8ac8162 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -82,7 +82,7 @@ kallsyms() kallsymopt="${kallsymopt} --all-symbols" fi - if [ -n "${CONFIG_ARM}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then + if [ -n "${CONFIG_ARM}" ] && [ -z "${CONFIG_XIP_KERNEL}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" fi @@ -111,7 +111,6 @@ sortextable() } # Delete output files in case of error -trap cleanup SIGHUP SIGINT SIGQUIT SIGTERM ERR cleanup() { rm -f .old_version @@ -124,6 +123,20 @@ cleanup() rm -f vmlinux.o } +on_exit() +{ + if [ $? -ne 0 ]; then + cleanup + fi +} +trap on_exit EXIT + +on_signals() +{ + exit 1 +} +trap on_signals HUP INT QUIT TERM + # # # Use "make V=1" to debug this script @@ -231,7 +244,6 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then if ! cmp -s System.map .tmp_System.map; then echo >&2 Inconsistent kallsyms data echo >&2 Try "make KALLSYMS_EXTRA_PASS=1" as a workaround - cleanup exit 1 fi fi diff --git a/scripts/tags.sh b/scripts/tags.sh index cdb491d84503..c0a932dff329 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -154,7 +154,7 @@ exuberant() { all_target_sources | xargs $1 -a \ -I __initdata,__exitdata,__initconst, \ - -I __cpuinitdata,__initdata_memblock \ + -I __initdata_memblock \ -I __refdata,__attribute,__maybe_unused,__always_unused \ -I __acquires,__releases,__deprecated \ -I __read_mostly,__aligned,____cacheline_aligned \ diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 5da129e10aa2..326e826a5d20 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -127,7 +127,7 @@ int acpi_getopt(int argc, char **argv, char *opts) argv[acpi_gbl_optind][0] != '-' || argv[acpi_gbl_optind][1] == '\0') { return (ACPI_OPT_END); - } else if (ACPI_STRCMP(argv[acpi_gbl_optind], "--") == 0) { + } else if (strcmp(argv[acpi_gbl_optind], "--") == 0) { acpi_gbl_optind++; return (ACPI_OPT_END); } @@ -140,7 +140,7 @@ int acpi_getopt(int argc, char **argv, char *opts) /* Make sure that the option is legal */ if (current_char == ':' || - (opts_ptr = ACPI_STRCHR(opts, current_char)) == NULL) { + (opts_ptr = strchr(opts, current_char)) == NULL) { ACPI_OPTION_ERROR("Illegal option: -", current_char); if (argv[acpi_gbl_optind][++current_char_ptr] == '\0') { diff --git a/tools/power/acpi/man/acpidump.8 b/tools/power/acpi/man/acpidump.8 index 38f095d86b52..79e2d1d435d1 100644 --- a/tools/power/acpi/man/acpidump.8 +++ b/tools/power/acpi/man/acpidump.8 @@ -22,9 +22,6 @@ acpidump options are as follow: .B \-b Dump tables to binary files .TP -.B \-c -Dump customized tables -.TP .B \-h \-? This help message .TP @@ -48,15 +45,25 @@ Verbose mode .B \-a <Address> Get table via a physical address .TP +.B \-c <on|off> +Turning on/off customized table dumping +.TP .B \-f <BinaryFile> Get table via a binary file .TP .B \-n <Signature> Get table via a name/signature .TP -Invocation without parameters dumps all available tables +.B \-x +Do not use but dump XSDT +.TP +.B \-x \-x +Do not use or dump XSDT +.TP +.fi +Invocation without parameters dumps all available tables. .TP -Multiple mixed instances of -a, -f, and -n are supported +Multiple mixed instances of -a, -f, and -n are supported. .SH EXAMPLES diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index db15c9d2049e..dd5008b0617a 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -222,7 +222,7 @@ acpi_os_get_table_by_address(acpi_physical_address address, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); exit: osl_unmap_table(mapped_table); @@ -531,7 +531,7 @@ static acpi_status osl_load_rsdp(void) gbl_rsdp_address = rsdp_base + (ACPI_CAST8(mapped_table) - rsdp_address); - ACPI_MEMCPY(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); + memcpy(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); acpi_os_unmap_memory(rsdp_address, rsdp_size); return (AE_OK); @@ -582,64 +582,67 @@ static acpi_status osl_table_initialize(void) return (AE_OK); } - /* Get RSDP from memory */ + if (!gbl_dump_customized_tables) { - status = osl_load_rsdp(); - if (ACPI_FAILURE(status)) { - return (status); - } + /* Get RSDP from memory */ + + status = osl_load_rsdp(); + if (ACPI_FAILURE(status)) { + return (status); + } - /* Get XSDT from memory */ + /* Get XSDT from memory */ - if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { - if (gbl_xsdt) { - free(gbl_xsdt); - gbl_xsdt = NULL; + if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { + if (gbl_xsdt) { + free(gbl_xsdt); + gbl_xsdt = NULL; + } + + gbl_revision = 2; + status = osl_get_bios_table(ACPI_SIG_XSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_xsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } } - gbl_revision = 2; - status = osl_get_bios_table(ACPI_SIG_XSDT, 0, - ACPI_CAST_PTR(struct - acpi_table_header *, - &gbl_xsdt), &address); - if (ACPI_FAILURE(status)) { - return (status); + /* Get RSDT from memory */ + + if (gbl_rsdp.rsdt_physical_address) { + if (gbl_rsdt) { + free(gbl_rsdt); + gbl_rsdt = NULL; + } + + status = osl_get_bios_table(ACPI_SIG_RSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_rsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } } - } - /* Get RSDT from memory */ + /* Get FADT from memory */ - if (gbl_rsdp.rsdt_physical_address) { - if (gbl_rsdt) { - free(gbl_rsdt); - gbl_rsdt = NULL; + if (gbl_fadt) { + free(gbl_fadt); + gbl_fadt = NULL; } - status = osl_get_bios_table(ACPI_SIG_RSDT, 0, + status = osl_get_bios_table(ACPI_SIG_FADT, 0, ACPI_CAST_PTR(struct acpi_table_header *, - &gbl_rsdt), &address); + &gbl_fadt), + &gbl_fadt_address); if (ACPI_FAILURE(status)) { return (status); } - } - - /* Get FADT from memory */ - - if (gbl_fadt) { - free(gbl_fadt); - gbl_fadt = NULL; - } - - status = osl_get_bios_table(ACPI_SIG_FADT, 0, - ACPI_CAST_PTR(struct acpi_table_header *, - &gbl_fadt), - &gbl_fadt_address); - if (ACPI_FAILURE(status)) { - return (status); - } - - if (!gbl_dump_customized_tables) { /* Add mandatory tables to global table list first */ @@ -961,7 +964,7 @@ osl_get_bios_table(char *signature, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); *address = table_address; *table = local_table; diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 0b1fa290245a..44ad4889d468 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -54,7 +54,7 @@ ACPI_MODULE_NAME("osunixmap") #ifndef O_BINARY #define O_BINARY 0 #endif -#ifdef _free_BSD +#if defined(_dragon_fly) || defined(_free_BSD) #define MMAP_FLAGS MAP_SHARED #else #define MMAP_FLAGS MAP_PRIVATE diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index 84bdef0136cb..eed534481434 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -66,7 +66,7 @@ EXTERN u8 INIT_GLOBAL(gbl_summary_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_verbose_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_binary_mode, FALSE); -EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, FALSE); +EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, TRUE); EXTERN u8 INIT_GLOBAL(gbl_do_not_dump_xsdt, FALSE); EXTERN ACPI_FILE INIT_GLOBAL(gbl_output_file, NULL); EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL); diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index c736adf5fb55..61d0de804b70 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -329,7 +329,7 @@ int ap_dump_table_by_name(char *signature) acpi_status status; int table_status; - if (ACPI_STRLEN(signature) != ACPI_NAME_SIZE) { + if (strlen(signature) != ACPI_NAME_SIZE) { acpi_log_error ("Invalid table signature [%s]: must be exactly 4 characters\n", signature); @@ -338,15 +338,15 @@ int ap_dump_table_by_name(char *signature) /* Table signatures are expected to be uppercase */ - ACPI_STRCPY(local_signature, signature); + strcpy(local_signature, signature); acpi_ut_strupr(local_signature); /* To be friendly, handle tables whose signatures do not match the name */ if (ACPI_COMPARE_NAME(local_signature, "FADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_FADT); + strcpy(local_signature, ACPI_SIG_FADT); } else if (ACPI_COMPARE_NAME(local_signature, "MADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_MADT); + strcpy(local_signature, ACPI_SIG_MADT); } /* Dump all instances of this signature (to handle multiple SSDTs) */ diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 8f2fe168228e..a37f9702b2a9 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -136,10 +136,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) } else { ACPI_MOVE_NAME(filename, table->signature); } - filename[0] = (char)ACPI_TOLOWER(filename[0]); - filename[1] = (char)ACPI_TOLOWER(filename[1]); - filename[2] = (char)ACPI_TOLOWER(filename[2]); - filename[3] = (char)ACPI_TOLOWER(filename[3]); + filename[0] = (char)tolower((int)filename[0]); + filename[1] = (char)tolower((int)filename[1]); + filename[2] = (char)tolower((int)filename[2]); + filename[3] = (char)tolower((int)filename[3]); filename[ACPI_NAME_SIZE] = 0; /* Handle multiple SSDts - create different filenames for each */ @@ -147,10 +147,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) if (instance > 0) { acpi_ut_snprintf(instance_str, sizeof(instance_str), "%u", instance); - ACPI_STRCAT(filename, instance_str); + strcat(filename, instance_str); } - ACPI_STRCAT(filename, ACPI_TABLE_FILE_SUFFIX); + strcat(filename, ACPI_TABLE_FILE_SUFFIX); if (gbl_verbose_mode) { acpi_log_error diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index d0ba6535f5af..57620f66ae6c 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -80,7 +80,7 @@ struct ap_dump_action action_table[AP_MAX_ACTIONS]; u32 current_action = 0; #define AP_UTILITY_NAME "ACPI Binary Table Dump Utility" -#define AP_SUPPORTED_OPTIONS "?a:bcf:hn:o:r:svxz" +#define AP_SUPPORTED_OPTIONS "?a:bc:f:hn:o:r:svxz" /****************************************************************************** * @@ -96,7 +96,6 @@ static void ap_display_usage(void) ACPI_USAGE_HEADER("acpidump [options]"); ACPI_OPTION("-b", "Dump tables to binary files"); - ACPI_OPTION("-c", "Dump customized tables"); ACPI_OPTION("-h -?", "This help message"); ACPI_OPTION("-o <File>", "Redirect output to file"); ACPI_OPTION("-r <Address>", "Dump tables from specified RSDP"); @@ -107,6 +106,7 @@ static void ap_display_usage(void) ACPI_USAGE_TEXT("\nTable Options:\n"); ACPI_OPTION("-a <Address>", "Get table via a physical address"); + ACPI_OPTION("-c <on|off>", "Turning on/off customized table dumping"); ACPI_OPTION("-f <BinaryFile>", "Get table via a binary file"); ACPI_OPTION("-n <Signature>", "Get table via a name/signature"); ACPI_OPTION("-x", "Do not use but dump XSDT"); @@ -181,7 +181,16 @@ static int ap_do_options(int argc, char **argv) case 'c': /* Dump customized tables */ - gbl_dump_customized_tables = TRUE; + if (!strcmp(acpi_gbl_optarg, "on")) { + gbl_dump_customized_tables = TRUE; + } else if (!strcmp(acpi_gbl_optarg, "off")) { + gbl_dump_customized_tables = FALSE; + } else { + acpi_log_error + ("%s: Cannot handle this switch, please use on|off\n", + acpi_gbl_optarg); + return (-1); + } continue; case 'h': |