diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-04 11:10:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-04 11:10:18 -0700 |
commit | 352712274507645b6f82b8763977ad87321919a3 (patch) | |
tree | 62ec7872a91e8e984f557f96496f2a369631a068 /drivers/dma | |
parent | 88a99886c26fec8bf662e7b6bc080431a8660326 (diff) | |
parent | ab98193dace971f4742eebb5103212e23bb392f5 (diff) |
Merge tag 'dmaengine-4.3-rc1' of git://git.infradead.org/users/vkoul/slave-dma
Pull dmaengine updates from Vinod Koul:
"This time we have aded a new capability for scatter-gathered memset
using dmaengine APIs. This is supported in xdmac & hdmac drivers
We have added support for reusing descriptors for examples like video
buffers etc. Driver will follow
The behaviour of descriptor ack has been clarified and documented
New devices added are:
- dma controller in sun[457]i SoCs
- lpc18xx dmamux
- ZTE ZX296702 dma controller
- Analog Devices AXI-DMAC DMA controller
- eDMA support for dma-crossbar
- imx6sx support in imx-sdma driver
- imx-sdma device to device support
Other:
- jz4780 fixes
- ioatdma large refactor and cleanup for removal of ioat v1 and v2
which is deprecated and fixes
- ACPI support in X-Gene DMA engine driver
- ipu irq fixes
- mvxor fixes
- minor fixes spread thru drivers"
[ The Kconfig and Makefile entries got re-sorted alphabetically, and I
handled the conflict with the new Intel integrated IDMA driver by
slightly mis-sorting it on purpose: "IDMA64" got sorted after "IMX" in
order to keep the Intel entries together. I think it might be a good
idea to just rename the IDMA64 config entry to INTEL_IDMA64 to make
the sorting be a true sort, not this mismash.
Also, this merge disables the COMPILE_TEST for the sun4i DMA
controller, because it does not compile cleanly at all. - Linus ]
* tag 'dmaengine-4.3-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (89 commits)
dmaengine: ioatdma: add Broadwell EP ioatdma PCI dev IDs
dmaengine :ipu: change ipu_irq_handler() to remove compile warning
dmaengine: ioatdma: Fix variable array length
dmaengine: ioatdma: fix sparse "error" with prep lock
dmaengine: hdmac: Add memset capabilities
dmaengine: sort the sh Makefile
dmaengine: sort the sh Kconfig
dmaengine: sort the dw Kconfig
dmaengine: sort the Kconfig
dmaengine: sort the makefile
drivers/dma: make mv_xor.c driver explicitly non-modular
dmaengine: Add support for the Analog Devices AXI-DMAC DMA controller
devicetree: Add bindings documentation for Analog Devices AXI-DMAC
dmaengine: xgene-dma: Fix the lock to allow client for further submission of requests
dmaengine: ioatdma: fix coccinelle warning
dmaengine: ioatdma: fix zero day warning on incompatible pointer type
dmaengine: tegra-apb: Simplify locking for device using global pause
dmaengine: tegra-apb: Remove unnecessary return statements and variables
dmaengine: tegra-apb: Avoid unnecessary channel base address calculation
dmaengine: tegra-apb: Remove unused variables
...
Diffstat (limited to 'drivers/dma')
48 files changed, 7606 insertions, 5168 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index bdbbe5bcfb83..b4584757dae0 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -33,27 +33,29 @@ if DMADEVICES comment "DMA Devices" -config INTEL_MIC_X100_DMA - tristate "Intel MIC X100 DMA Driver" - depends on 64BIT && X86 && INTEL_MIC_BUS - select DMA_ENGINE - help - This enables DMA support for the Intel Many Integrated Core - (MIC) family of PCIe form factor coprocessor X100 devices that - run a 64 bit Linux OS. This driver will be used by both MIC - host and card drivers. - - If you are building host kernel with a MIC device or a card - kernel for a MIC device, then say M (recommended) or Y, else - say N. If unsure say N. +#core +config ASYNC_TX_ENABLE_CHANNEL_SWITCH + bool - More information about the Intel MIC family as well as the Linux - OS and tools for MIC to use with this driver are available from - <http://software.intel.com/en-us/mic-developer>. +config ARCH_HAS_ASYNC_TX_FIND_CHANNEL + bool -config ASYNC_TX_ENABLE_CHANNEL_SWITCH +config DMA_ENGINE bool +config DMA_VIRTUAL_CHANNELS + tristate + +config DMA_ACPI + def_bool y + depends on ACPI + +config DMA_OF + def_bool y + depends on OF + select DMA_ENGINE + +#devices config AMBA_PL08X bool "ARM PrimeCell PL080 or PL081 support" depends on ARM_AMBA @@ -63,51 +65,112 @@ config AMBA_PL08X Platform has a PL08x DMAC device which can provide DMA engine support -config INTEL_IOATDMA - tristate "Intel I/OAT DMA support" - depends on PCI && X86 +config AMCC_PPC440SPE_ADMA + tristate "AMCC PPC440SPe ADMA support" + depends on 440SPe || 440SP select DMA_ENGINE select DMA_ENGINE_RAID - select DCA + select ARCH_HAS_ASYNC_TX_FIND_CHANNEL + select ASYNC_TX_ENABLE_CHANNEL_SWITCH help - Enable support for the Intel(R) I/OAT DMA engine present - in recent Intel Xeon chipsets. + Enable support for the AMCC PPC440SPe RAID engines. - Say Y here if you have such a chipset. +config AT_HDMAC + tristate "Atmel AHB DMA support" + depends on ARCH_AT91 + select DMA_ENGINE + help + Support the Atmel AHB DMA controller. - If unsure, say N. +config AT_XDMAC + tristate "Atmel XDMA support" + depends on ARCH_AT91 + select DMA_ENGINE + help + Support the Atmel XDMA controller. -config INTEL_IOP_ADMA - tristate "Intel IOP ADMA support" - depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX +config AXI_DMAC + tristate "Analog Devices AXI-DMAC DMA support" + depends on MICROBLAZE || NIOS2 || ARCH_ZYNQ || ARCH_SOCFPGA || COMPILE_TEST select DMA_ENGINE - select ASYNC_TX_ENABLE_CHANNEL_SWITCH + select DMA_VIRTUAL_CHANNELS help - Enable support for the Intel(R) IOP Series RAID engines. + Enable support for the Analog Devices AXI-DMAC peripheral. This DMA + controller is often used in Analog Device's reference designs for FPGA + platforms. -config IDMA64 - tristate "Intel integrated DMA 64-bit support" +config COH901318 + bool "ST-Ericsson COH901318 DMA support" + select DMA_ENGINE + depends on ARCH_U300 + help + Enable support for ST-Ericsson COH 901 318 DMA. + +config DMA_BCM2835 + tristate "BCM2835 DMA engine support" + depends on ARCH_BCM2835 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + +config DMA_JZ4740 + tristate "JZ4740 DMA support" + depends on MACH_JZ4740 + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + +config DMA_JZ4780 + tristate "JZ4780 DMA support" + depends on MACH_JZ4780 select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Enable DMA support for Intel Low Power Subsystem such as found on - Intel Skylake PCH. + This selects support for the DMA controller in Ingenic JZ4780 SoCs. + If you have a board based on such a SoC and wish to use DMA for + devices which can use the DMA controller, say Y or M here. -source "drivers/dma/dw/Kconfig" +config DMA_OMAP + tristate "OMAP DMA support" + depends on ARCH_OMAP + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + select TI_DMA_CROSSBAR if SOC_DRA7XX -config AT_HDMAC - tristate "Atmel AHB DMA support" - depends on ARCH_AT91 +config DMA_SA11X0 + tristate "SA-11x0 DMA support" + depends on ARCH_SA1100 select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS help - Support the Atmel AHB DMA controller. + Support the DMA engine found on Intel StrongARM SA-1100 and + SA-1110 SoCs. This DMA engine can only be used with on-chip + devices. -config AT_XDMAC - tristate "Atmel XDMA support" - depends on ARCH_AT91 +config DMA_SUN4I + tristate "Allwinner A10 DMA SoCs support" + depends on MACH_SUN4I || MACH_SUN5I || MACH_SUN7I + default (MACH_SUN4I || MACH_SUN5I || MACH_SUN7I) select DMA_ENGINE + select DMA_OF + select DMA_VIRTUAL_CHANNELS help - Support the Atmel XDMA controller. + Enable support for the DMA controller present in the sun4i, + sun5i and sun7i Allwinner ARM SoCs. + +config DMA_SUN6I + tristate "Allwinner A31 SoCs DMA support" + depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST + depends on RESET_CONTROLLER + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the DMA engine first found in Allwinner A31 SoCs. + +config EP93XX_DMA + bool "Cirrus Logic EP93xx DMA support" + depends on ARCH_EP93XX + select DMA_ENGINE + help + Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller. config FSL_DMA tristate "Freescale Elo series DMA support" @@ -120,6 +183,16 @@ config FSL_DMA EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on some Txxx and Bxxx parts. +config FSL_EDMA + tristate "Freescale eDMA engine support" + depends on OF + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the Freescale eDMA engine with programmable channel + multiplexing capability for DMA request sources(slot). + This module can be found on Freescale Vybrid and LS-1 SoCs. + config FSL_RAID tristate "Freescale RAID engine Support" depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH @@ -131,153 +204,175 @@ config FSL_RAID the capability to offload memcpy, xor and pq computation for raid5/6. -source "drivers/dma/hsu/Kconfig" - -config MPC512X_DMA - tristate "Freescale MPC512x built-in DMA engine support" - depends on PPC_MPC512x || PPC_MPC831x +config IMG_MDC_DMA + tristate "IMG MDC support" + depends on MIPS || COMPILE_TEST + depends on MFD_SYSCON select DMA_ENGINE - ---help--- - Enable support for the Freescale MPC512x built-in DMA engine. - -source "drivers/dma/bestcomm/Kconfig" + select DMA_VIRTUAL_CHANNELS + help + Enable support for the IMG multi-threaded DMA controller (MDC). -config MV_XOR - bool "Marvell XOR engine support" - depends on PLAT_ORION +config IMX_DMA + tristate "i.MX DMA support" + depends on ARCH_MXC select DMA_ENGINE - select DMA_ENGINE_RAID - select ASYNC_TX_ENABLE_CHANNEL_SWITCH - ---help--- - Enable support for the Marvell XOR engine. + help + Support the i.MX DMA engine. This engine is integrated into + Freescale i.MX1/21/27 chips. -config MX3_IPU - bool "MX3x Image Processing Unit support" +config IMX_SDMA + tristate "i.MX SDMA support" depends on ARCH_MXC select DMA_ENGINE - default y help - If you plan to use the Image Processing unit in the i.MX3x, say - Y here. If unsure, select Y. + Support the i.MX SDMA engine. This engine is integrated into + Freescale i.MX25/31/35/51/53/6 chips. -config MX3_IPU_IRQS - int "Number of dynamically mapped interrupts for IPU" - depends on MX3_IPU - range 2 137 - default 4 +config IDMA64 + tristate "Intel integrated DMA 64-bit support" + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS help - Out of 137 interrupt sources on i.MX31 IPU only very few are used. - To avoid bloating the irq_desc[] array we allocate a sufficient - number of IRQ slots and map them dynamically to specific sources. + Enable DMA support for Intel Low Power Subsystem such as found on + Intel Skylake PCH. -config PXA_DMA - bool "PXA DMA support" - depends on (ARCH_MMP || ARCH_PXA) +config INTEL_IOATDMA + tristate "Intel I/OAT DMA support" + depends on PCI && X86_64 select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS + select DMA_ENGINE_RAID + select DCA help - Support the DMA engine for PXA. It is also compatible with MMP PDMA - platform. The internal DMA IP of all PXA variants is supported, with - 16 to 32 channels for peripheral to memory or memory to memory - transfers. + Enable support for the Intel(R) I/OAT DMA engine present + in recent Intel Xeon chipsets. -config TXX9_DMAC - tristate "Toshiba TXx9 SoC DMA support" - depends on MACH_TX49XX || MACH_TX39XX + Say Y here if you have such a chipset. + + If unsure, say N. + +config INTEL_IOP_ADMA + tristate "Intel IOP ADMA support" + depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX select DMA_ENGINE + select ASYNC_TX_ENABLE_CHANNEL_SWITCH help - Support the TXx9 SoC internal DMA controller. This can be - integrated in chips such as the Toshiba TX4927/38/39. + Enable support for the Intel(R) IOP Series RAID engines. -config TEGRA20_APB_DMA - bool "NVIDIA Tegra20 APB DMA support" - depends on ARCH_TEGRA +config INTEL_MIC_X100_DMA + tristate "Intel MIC X100 DMA Driver" + depends on 64BIT && X86 && INTEL_MIC_BUS select DMA_ENGINE help - Support for the NVIDIA Tegra20 APB DMA controller driver. The - DMA controller is having multiple DMA channel which can be - configured for different peripherals like audio, UART, SPI, - I2C etc which is in APB bus. - This DMA controller transfers data from memory to peripheral fifo - or vice versa. It does not support memory to memory data transfer. + This enables DMA support for the Intel Many Integrated Core + (MIC) family of PCIe form factor coprocessor X100 devices that + run a 64 bit Linux OS. This driver will be used by both MIC + host and card drivers. -config S3C24XX_DMAC - tristate "Samsung S3C24XX DMA support" - depends on ARCH_S3C24XX + If you are building host kernel with a MIC device or a card + kernel for a MIC device, then say M (recommended) or Y, else + say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + <http://software.intel.com/en-us/mic-developer>. + +config K3_DMA + tristate "Hisilicon K3 DMA support" + depends on ARCH_HI3xxx select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Support for the Samsung S3C24XX DMA controller driver. The - DMA controller is having multiple DMA channels which can be - configured for different peripherals like audio, UART, SPI. - The DMA controller can transfer data from memory to peripheral, - periphal to memory, periphal to periphal and memory to memory. + Support the DMA engine for Hisilicon K3 platform + devices. -source "drivers/dma/sh/Kconfig" +config LPC18XX_DMAMUX + bool "NXP LPC18xx/43xx DMA MUX for PL080" + depends on ARCH_LPC18XX || COMPILE_TEST + depends on OF && AMBA_PL08X + select MFD_SYSCON + help + Enable support for DMA on NXP LPC18xx/43xx platforms + with PL080 and multiplexed DMA request lines. -config COH901318 - bool "ST-Ericsson COH901318 DMA support" +config MMP_PDMA + bool "MMP PDMA support" + depends on (ARCH_MMP || ARCH_PXA) select DMA_ENGINE - depends on ARCH_U300 help - Enable support for ST-Ericsson COH 901 318 DMA. + Support the MMP PDMA engine for PXA and MMP platform. -config STE_DMA40 - bool "ST-Ericsson DMA40 support" - depends on ARCH_U8500 +config MMP_TDMA + bool "MMP Two-Channel DMA support" + depends on ARCH_MMP select DMA_ENGINE + select MMP_SRAM help - Support for ST-Ericsson DMA40 controller + Support the MMP Two-Channel DMA engine. + This engine used for MMP Audio DMA and pxa910 SQU. + It needs sram driver under mach-mmp. -config AMCC_PPC440SPE_ADMA - tristate "AMCC PPC440SPe ADMA support" - depends on 440SPe || 440SP +config MOXART_DMA + tristate "MOXART DMA support" + depends on ARCH_MOXART select DMA_ENGINE - select DMA_ENGINE_RAID - select ARCH_HAS_ASYNC_TX_FIND_CHANNEL - select ASYNC_TX_ENABLE_CHANNEL_SWITCH + select DMA_OF + select DMA_VIRTUAL_CHANNELS help - Enable support for the AMCC PPC440SPe RAID engines. + Enable support for the MOXA ART SoC DMA controller. + + Say Y here if you enabled MMP ADMA, otherwise say N. -config TIMB_DMA - tristate "Timberdale FPGA DMA support" - depends on MFD_TIMBERDALE +config MPC512X_DMA + tristate "Freescale MPC512x built-in DMA engine support" + depends on PPC_MPC512x || PPC_MPC831x select DMA_ENGINE - help - Enable support for the Timberdale FPGA DMA engine. + ---help--- + Enable support for the Freescale MPC512x built-in DMA engine. -config SIRF_DMA - tristate "CSR SiRFprimaII/SiRFmarco DMA support" - depends on ARCH_SIRF +config MV_XOR + bool "Marvell XOR engine support" + depends on PLAT_ORION select DMA_ENGINE - help - Enable support for the CSR SiRFprimaII DMA engine. + select DMA_ENGINE_RAID + select ASYNC_TX_ENABLE_CHANNEL_SWITCH + ---help--- + Enable support for the Marvell XOR engine. -config TI_EDMA - bool "TI EDMA support" - depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE +config MXS_DMA + bool "MXS DMA support" + depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q + select STMP_DEVICE select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - select TI_PRIV_EDMA - default n help - Enable support for the TI EDMA controller. This DMA - engine is found on TI DaVinci and AM33xx parts. + Support the MXS DMA engine. This engine including APBH-DMA + and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips. -config TI_DMA_CROSSBAR - bool +config MX3_IPU + bool "MX3x Image Processing Unit support" + depends on ARCH_MXC + select DMA_ENGINE + default y + help + If you plan to use the Image Processing unit in the i.MX3x, say + Y here. If unsure, select Y. -config ARCH_HAS_ASYNC_TX_FIND_CHANNEL - bool +config MX3_IPU_IRQS + int "Number of dynamically mapped interrupts for IPU" + depends on MX3_IPU + range 2 137 + default 4 + help + Out of 137 interrupt sources on i.MX31 IPU only very few are used. + To avoid bloating the irq_desc[] array we allocate a sufficient + number of IRQ slots and map them dynamically to specific sources. -config PL330_DMA - tristate "DMA API Driver for PL330" +config NBPFAXI_DMA + tristate "Renesas Type-AXI NBPF DMA support" select DMA_ENGINE - depends on ARM_AMBA + depends on ARM || COMPILE_TEST help - Select if your platform has one or more PL330 DMACs. - You need to provide platform specific settings via - platform_data for a dma-pl330 device. + Support for "Type-AXI" NBPF DMA IPs from Renesas config PCH_DMA tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA" @@ -293,72 +388,87 @@ config PCH_DMA ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series. ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH. -config IMX_SDMA - tristate "i.MX SDMA support" - depends on ARCH_MXC +config PL330_DMA + tristate "DMA API Driver for PL330" select DMA_ENGINE + depends on ARM_AMBA help - Support the i.MX SDMA engine. This engine is integrated into - Freescale i.MX25/31/35/51/53/6 chips. + Select if your platform has one or more PL330 DMACs. + You need to provide platform specific settings via + platform_data for a dma-pl330 device. -config IMX_DMA - tristate "i.MX DMA support" - depends on ARCH_MXC +config PXA_DMA + bool "PXA DMA support" + depends on (ARCH_MMP || ARCH_PXA) select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS help - Support the i.MX DMA engine. This engine is integrated into - Freescale i.MX1/21/27 chips. + Support the DMA engine for PXA. It is also compatible with MMP PDMA + platform. The internal DMA IP of all PXA variants is supported, with + 16 to 32 channels for peripheral to memory or memory to memory + transfers. -config MXS_DMA - bool "MXS DMA support" - depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q - select STMP_DEVICE +config QCOM_BAM_DMA + tristate "QCOM BAM DMA support" + depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM) + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + ---help--- + Enable support for the QCOM BAM DMA controller. This controller + provides DMA capabilities for a variety of on-chip devices. + +config SIRF_DMA + tristate "CSR SiRFprimaII/SiRFmarco DMA support" + depends on ARCH_SIRF select DMA_ENGINE help - Support the MXS DMA engine. This engine including APBH-DMA - and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips. + Enable support for the CSR SiRFprimaII DMA engine. -config EP93XX_DMA - bool "Cirrus Logic EP93xx DMA support" - depends on ARCH_EP93XX +config STE_DMA40 + bool "ST-Ericsson DMA40 support" + depends on ARCH_U8500 select DMA_ENGINE help - Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller. + Support for ST-Ericsson DMA40 controller -config DMA_SA11X0 - tristate "SA-11x0 DMA support" - depends on ARCH_SA1100 +config S3C24XX_DMAC + tristate "Samsung S3C24XX DMA support" + depends on ARCH_S3C24XX select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Support the DMA engine found on Intel StrongARM SA-1100 and - SA-1110 SoCs. This DMA engine can only be used with on-chip - devices. + Support for the Samsung S3C24XX DMA controller driver. The + DMA controller is having multiple DMA channels which can be + configured for different peripherals like audio, UART, SPI. + The DMA controller can transfer data from memory to peripheral, + periphal to memory, periphal to periphal and memory to memory. -config MMP_TDMA - bool "MMP Two-Channel DMA support" - depends on ARCH_MMP +config TXX9_DMAC + tristate "Toshiba TXx9 SoC DMA support" + depends on MACH_TX49XX || MACH_TX39XX select DMA_ENGINE - select MMP_SRAM help - Support the MMP Two-Channel DMA engine. - This engine used for MMP Audio DMA and pxa910 SQU. - It needs sram driver under mach-mmp. - - Say Y here if you enabled MMP ADMA, otherwise say N. + Support the TXx9 SoC internal DMA controller. This can be + integrated in chips such as the Toshiba TX4927/38/39. -config DMA_OMAP - tristate "OMAP DMA support" - depends on ARCH_OMAP +config TEGRA20_APB_DMA + bool "NVIDIA Tegra20 APB DMA support" + depends on ARCH_TEGRA select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - select TI_DMA_CROSSBAR if SOC_DRA7XX + help + Support for the NVIDIA Tegra20 APB DMA controller driver. The + DMA controller is having multiple DMA channel which can be + configured for different peripherals like audio, UART, SPI, + I2C etc which is in APB bus. + This DMA controller transfers data from memory to peripheral fifo + or vice versa. It does not support memory to memory data transfer. -config DMA_BCM2835 - tristate "BCM2835 DMA engine support" - depends on ARCH_BCM2835 +config TIMB_DMA + tristate "Timberdale FPGA DMA support" + depends on MFD_TIMBERDALE select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS + help + Enable support for the Timberdale FPGA DMA engine. config TI_CPPI41 tristate "AM33xx CPPI41 DMA support" @@ -368,56 +478,28 @@ config TI_CPPI41 The Communications Port Programming Interface (CPPI) 4.1 DMA engine is currently used by the USB driver on AM335x platforms. -config MMP_PDMA - bool "MMP PDMA support" - depends on (ARCH_MMP || ARCH_PXA) - select DMA_ENGINE - help - Support the MMP PDMA engine for PXA and MMP platform. - -config DMA_JZ4740 - tristate "JZ4740 DMA support" - depends on MACH_JZ4740 - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - -config DMA_JZ4780 - tristate "JZ4780 DMA support" - depends on MACH_JZ4780 - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - help - This selects support for the DMA controller in Ingenic JZ4780 SoCs. - If you have a board based on such a SoC and wish to use DMA for - devices which can use the DMA controller, say Y or M here. +config TI_DMA_CROSSBAR + bool -config K3_DMA - tristate "Hisilicon K3 DMA support" - depends on ARCH_HI3xxx +config TI_EDMA + bool "TI EDMA support" + depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE select DMA_ENGINE select DMA_VIRTUAL_CHANNELS + select TI_PRIV_EDMA + default n help - Support the DMA engine for Hisilicon K3 platform - devices. + Enable support for the TI EDMA controller. This DMA + engine is found on TI DaVinci and AM33xx parts. -config MOXART_DMA - tristate "MOXART DMA support" - depends on ARCH_MOXART - select DMA_ENGINE - select DMA_OF - select DMA_VIRTUAL_CHANNELS - help - Enable support for the MOXA ART SoC DMA controller. - -config FSL_EDMA - tristate "Freescale eDMA engine support" - depends on OF +config XGENE_DMA + tristate "APM X-Gene DMA support" + depends on ARCH_XGENE || COMPILE_TEST select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS + select DMA_ENGINE_RAID + select ASYNC_TX_ENABLE_CHANNEL_SWITCH help - Support the Freescale eDMA engine with programmable channel - multiplexing capability for DMA request sources(slot). - This module can be found on Freescale Vybrid and LS-1 SoCs. + Enable support for the APM X-Gene SoC DMA engine. config XILINX_VDMA tristate "Xilinx AXI VDMA Engine" @@ -433,55 +515,25 @@ config XILINX_VDMA channels, Memory Mapped to Stream (MM2S) and Stream to Memory Mapped (S2MM) for the data transfers. -config DMA_SUN6I - tristate "Allwinner A31 SoCs DMA support" - depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST - depends on RESET_CONTROLLER - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - help - Support for the DMA engine first found in Allwinner A31 SoCs. - -config NBPFAXI_DMA - tristate "Renesas Type-AXI NBPF DMA support" - select DMA_ENGINE - depends on ARM || COMPILE_TEST - help - Support for "Type-AXI" NBPF DMA IPs from Renesas - -config IMG_MDC_DMA - tristate "IMG MDC support" - depends on MIPS || COMPILE_TEST - depends on MFD_SYSCON +config ZX_DMA + tristate "ZTE ZX296702 DMA support" + depends on ARCH_ZX select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Enable support for the IMG multi-threaded DMA controller (MDC). + Support the DMA engine for ZTE ZX296702 platform devices. -config XGENE_DMA - tristate "APM X-Gene DMA support" - depends on ARCH_XGENE || COMPILE_TEST - select DMA_ENGINE - select DMA_ENGINE_RAID - select ASYNC_TX_ENABLE_CHANNEL_SWITCH - help - Enable support for the APM X-Gene SoC DMA engine. -config DMA_ENGINE - bool +# driver files +source "drivers/dma/bestcomm/Kconfig" -config DMA_VIRTUAL_CHANNELS - tristate +source "drivers/dma/dw/Kconfig" -config DMA_ACPI - def_bool y - depends on ACPI +source "drivers/dma/hsu/Kconfig" -config DMA_OF - def_bool y - depends on OF - select DMA_ENGINE +source "drivers/dma/sh/Kconfig" +# clients comment "DMA Clients" depends on DMA_ENGINE @@ -506,13 +558,4 @@ config DMATEST config DMA_ENGINE_RAID bool -config QCOM_BAM_DMA - tristate "QCOM BAM DMA support" - depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM) - select DMA_ENGINE - select DMA_VIRTUAL_CHANNELS - ---help--- - Enable support for the QCOM BAM DMA controller. This controller - provides DMA capabilities for a variety of on-chip devices. - endif diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 56ff8c705c00..7711a7180726 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -1,59 +1,69 @@ +#dmaengine debug flags subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG +#core obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o obj-$(CONFIG_DMA_ACPI) += acpi-dma.o obj-$(CONFIG_DMA_OF) += of-dma.o +#dmatest obj-$(CONFIG_DMATEST) += dmatest.o -obj-$(CONFIG_INTEL_IOATDMA) += ioat/ -obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o -obj-$(CONFIG_FSL_DMA) += fsldma.o -obj-$(CONFIG_HSU_DMA) += hsu/ -obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o -obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/ -obj-$(CONFIG_MV_XOR) += mv_xor.o -obj-$(CONFIG_IDMA64) += idma64.o -obj-$(CONFIG_DW_DMAC_CORE) += dw/ + +#devices +obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o +obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ obj-$(CONFIG_AT_HDMAC) += at_hdmac.o obj-$(CONFIG_AT_XDMAC) += at_xdmac.o -obj-$(CONFIG_MX3_IPU) += ipu/ -obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o -obj-$(CONFIG_RENESAS_DMA) += sh/ +obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o -obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ -obj-$(CONFIG_IMX_SDMA) += imx-sdma.o +obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o +obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o +obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o +obj-$(CONFIG_DMA_OMAP) += omap-dma.o +obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o +obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o +obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o +obj-$(CONFIG_DW_DMAC_CORE) += dw/ +obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o +obj-$(CONFIG_FSL_DMA) += fsldma.o +obj-$(CONFIG_FSL_EDMA) += fsl-edma.o +obj-$(CONFIG_FSL_RAID) += fsl_raid.o +obj-$(CONFIG_HSU_DMA) += hsu/ +obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o obj-$(CONFIG_IMX_DMA) += imx-dma.o +obj-$(CONFIG_IMX_SDMA) += imx-sdma.o +obj-$(CONFIG_IDMA64) += idma64.o +obj-$(CONFIG_INTEL_IOATDMA) += ioat/ +obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o +obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o +obj-$(CONFIG_K3_DMA) += k3dma.o +obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o +obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o +obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o +obj-$(CONFIG_MOXART_DMA) += moxart-dma.o +obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o +obj-$(CONFIG_MV_XOR) += mv_xor.o obj-$(CONFIG_MXS_DMA) += mxs-dma.o +obj-$(CONFIG_MX3_IPU) += ipu/ +obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o +obj-$(CONFIG_PCH_DMA) += pch_dma.o +obj-$(CONFIG_PL330_DMA) += pl330.o +obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/ obj-$(CONFIG_PXA_DMA) += pxa_dma.o -obj-$(CONFIG_TIMB_DMA) += timb_dma.o +obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o +obj-$(CONFIG_RENESAS_DMA) += sh/ obj-$(CONFIG_SIRF_DMA) += sirf-dma.o -obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o -obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o -obj-$(CONFIG_PL330_DMA) += pl330.o -obj-$(CONFIG_PCH_DMA) += pch_dma.o -obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o -obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o -obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o -obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o -obj-$(CONFIG_DMA_OMAP) += omap-dma.o -obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o -obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o -obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o -obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o -obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o +obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o +obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o +obj-$(CONFIG_TIMB_DMA) += timb_dma.o obj-$(CONFIG_TI_CPPI41) += cppi41.o -obj-$(CONFIG_K3_DMA) += k3dma.o -obj-$(CONFIG_MOXART_DMA) += moxart-dma.o -obj-$(CONFIG_FSL_RAID) += fsl_raid.o -obj-$(CONFIG_FSL_EDMA) += fsl-edma.o -obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o -obj-y += xilinx/ -obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o -obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o -obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o -obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o +obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o +obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_XGENE_DMA) += xgene-dma.o +obj-$(CONFIG_ZX_DMA) += zx296702_dma.o + +obj-y += xilinx/ diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 5de3cf453f35..9b42c0588550 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -83,6 +83,8 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/of.h> +#include <linux/of_dma.h> #include <linux/pm_runtime.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -2030,10 +2032,188 @@ static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) } #endif +#ifdef CONFIG_OF +static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x, + u32 id) +{ + struct pl08x_dma_chan *chan; + + list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) { + if (chan->signal == id) + return &chan->vc.chan; + } + + return NULL; +} + +static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct pl08x_driver_data *pl08x = ofdma->of_dma_data; + struct pl08x_channel_data *data; + struct pl08x_dma_chan *chan; + struct dma_chan *dma_chan; + + if (!pl08x) + return NULL; + + if (dma_spec->args_count != 2) + return NULL; + + dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]); + if (dma_chan) + return dma_get_slave_channel(dma_chan); + + chan = devm_kzalloc(pl08x->slave.dev, sizeof(*chan) + sizeof(*data), + GFP_KERNEL); + if (!chan) + return NULL; + + data = (void *)&chan[1]; + data->bus_id = "(none)"; + data->periph_buses = dma_spec->args[1]; + + chan->cd = data; + chan->host = pl08x; + chan->slave = true; + chan->name = data->bus_id; + chan->state = PL08X_CHAN_IDLE; + chan->signal = dma_spec->args[0]; + chan->vc.desc_free = pl08x_desc_free; + + vchan_init(&chan->vc, &pl08x->slave); + + return dma_get_slave_channel(&chan->vc.chan); +} + +static int pl08x_of_probe(struct amba_device *adev, + struct pl08x_driver_data *pl08x, + struct device_node *np) +{ + struct pl08x_platform_data *pd; + u32 cctl_memcpy = 0; + u32 val; + int ret; + + pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + /* Eligible bus masters for fetching LLIs */ + if (of_property_read_bool(np, "lli-bus-interface-ahb1")) + pd->lli_buses |= PL08X_AHB1; + if (of_property_read_bool(np, "lli-bus-interface-ahb2")) + pd->lli_buses |= PL08X_AHB2; + if (!pd->lli_buses) { + dev_info(&adev->dev, "no bus masters for LLIs stated, assume all\n"); + pd->lli_buses |= PL08X_AHB1 | PL08X_AHB2; + } + + /* Eligible bus masters for memory access */ + if (of_property_read_bool(np, "mem-bus-interface-ahb1")) + pd->mem_buses |= PL08X_AHB1; + if (of_property_read_bool(np, "mem-bus-interface-ahb2")) + pd->mem_buses |= PL08X_AHB2; + if (!pd->mem_buses) { + dev_info(&adev->dev, "no bus masters for memory stated, assume all\n"); + pd->mem_buses |= PL08X_AHB1 | PL08X_AHB2; + } + + /* Parse the memcpy channel properties */ + ret = of_property_read_u32(np, "memcpy-burst-size", &val); + if (ret) { + dev_info(&adev->dev, "no memcpy burst size specified, using 1 byte\n"); + val = 1; + } + switch (val) { + default: + dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n"); + /* Fall through */ + case 1: + cctl_memcpy |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 4: + cctl_memcpy |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 8: + cctl_memcpy |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 16: + cctl_memcpy |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 32: + cctl_memcpy |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 64: + cctl_memcpy |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 128: + cctl_memcpy |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 256: + cctl_memcpy |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + } + + ret = of_property_read_u32(np, "memcpy-bus-width", &val); + if (ret) { + dev_info(&adev->dev, "no memcpy bus width specified, using 8 bits\n"); + val = 8; + } + switch (val) { + default: + dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n"); + /* Fall through */ + case 8: + cctl_memcpy |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 16: + cctl_memcpy |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 32: + cctl_memcpy |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + } + + /* This is currently the only thing making sense */ + cctl_memcpy |= PL080_CONTROL_PROT_SYS; + + /* Set up memcpy channel */ + pd->memcpy_channel.bus_id = "memcpy"; + pd->memcpy_channel.cctl_memcpy = cctl_memcpy; + /* Use the buses that can access memory, obviously */ + pd->memcpy_channel.periph_buses = pd->mem_buses; + + pl08x->pd = pd; + + return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate, + pl08x); +} +#else +static inline int pl08x_of_probe(struct amba_device *adev, + struct pl08x_driver_data *pl08x, + struct device_node *np) +{ + return -EINVAL; +} +#endif + static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) { struct pl08x_driver_data *pl08x; const struct vendor_data *vd = id->data; + struct device_node *np = adev->dev.of_node; u32 tsfr_size; int ret = 0; int i; @@ -2093,9 +2273,15 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) /* Get the platform data */ pl08x->pd = dev_get_platdata(&adev->dev); if (!pl08x->pd) { - dev_err(&adev->dev, "no platform data supplied\n"); - ret = -EINVAL; - goto out_no_platdata; + if (np) { + ret = pl08x_of_probe(adev, pl08x, np); + if (ret) + goto out_no_platdata; + } else { + dev_err(&adev->dev, "no platform data supplied\n"); + ret = -EINVAL; + goto out_no_platdata; + } } /* Assign useful pointers to the driver state */ diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index d3629b7482dd..58d406230d89 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -448,6 +448,7 @@ static void atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) { struct dma_async_tx_descriptor *txd = &desc->txd; + struct at_dma *atdma = to_at_dma(atchan->chan_common.device); dev_vdbg(chan2dev(&atchan->chan_common), "descriptor %u complete\n", txd->cookie); @@ -456,6 +457,13 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) if (!atc_chan_is_cyclic(atchan)) dma_cookie_complete(txd); + /* If the transfer was a memset, free our temporary buffer */ + if (desc->memset) { + dma_pool_free(atdma->memset_pool, desc->memset_vaddr, + desc->memset_paddr); + desc->memset = false; + } + /* move children to free_list */ list_splice_init(&desc->tx_list, &atchan->free_list); /* move myself to free_list */ @@ -717,14 +725,14 @@ atc_prep_dma_interleaved(struct dma_chan *chan, size_t len = 0; int i; + if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) + return NULL; + dev_info(chan2dev(chan), "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n", __func__, xt->src_start, xt->dst_start, xt->numf, xt->frame_size, flags); - if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) - return NULL; - /* * The controller can only "skip" X bytes every Y bytes, so we * need to make sure we are given a template that fit that @@ -873,6 +881,93 @@ err_desc_get: return NULL; } +/** + * atc_prep_dma_memset - prepare a memcpy operation + * @chan: the channel to prepare operation on + * @dest: operation virtual destination address + * @value: value to set memory buffer to + * @len: operation length + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + struct at_desc *desc = NULL; + size_t xfer_count; + u32 ctrla; + u32 ctrlb; + + dev_vdbg(chan2dev(chan), "%s: d0x%x v0x%x l0x%zx f0x%lx\n", __func__, + dest, value, len, flags); + + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__); + return NULL; + } + + if (!is_dma_fill_aligned(chan->device, dest, 0, len)) { + dev_dbg(chan2dev(chan), "%s: buffer is not aligned\n", + __func__); + return NULL; + } + + xfer_count = len >> 2; + if (xfer_count > ATC_BTSIZE_MAX) { + dev_err(chan2dev(chan), "%s: buffer is too big\n", + __func__); + return NULL; + } + + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN + | ATC_SRC_ADDR_MODE_FIXED + | ATC_DST_ADDR_MODE_INCR + | ATC_FC_MEM2MEM; + + ctrla = ATC_SRC_WIDTH(2) | + ATC_DST_WIDTH(2); + + desc = atc_desc_get(atchan); + if (!desc) { + dev_err(chan2dev(chan), "%s: can't get a descriptor\n", + __func__); + return NULL; + } + + desc->memset_vaddr = dma_pool_alloc(atdma->memset_pool, GFP_ATOMIC, + &desc->memset_paddr); + if (!desc->memset_vaddr) { + dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n", + __func__); + goto err_put_desc; + } + + *desc->memset_vaddr = value; + desc->memset = true; + + desc->lli.saddr = desc->memset_paddr; + desc->lli.daddr = dest; + desc->lli.ctrla = ctrla | xfer_count; + desc->lli.ctrlb = ctrlb; + + desc->txd.cookie = -EBUSY; + desc->len = len; + desc->total_len = len; + + /* set end-of-link on the descriptor */ + set_desc_eol(desc); + + desc->txd.flags = flags; + + return &desc->txd; + +err_put_desc: + atc_desc_put(atchan, desc); + return NULL; +} + /** * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction @@ -1755,6 +1850,8 @@ static int __init at_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask); dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_MEMSET, at91sam9g45_config.cap_mask); + dma_cap_set(DMA_PRIVATE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask); @@ -1818,7 +1915,16 @@ static int __init at_dma_probe(struct platform_device *pdev) if (!atdma->dma_desc_pool) { dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); err = -ENOMEM; - goto err_pool_create; + goto err_desc_pool_create; + } + + /* create a pool of consistent memory blocks for memset blocks */ + atdma->memset_pool = dma_pool_create("at_hdmac_memset_pool", + &pdev->dev, sizeof(int), 4, 0); + if (!atdma->memset_pool) { + dev_err(&pdev->dev, "No memory for memset dma pool\n"); + err = -ENOMEM; + goto err_memset_pool_create; } /* clear any pending interrupt */ @@ -1864,6 +1970,11 @@ static int __init at_dma_probe(struct platform_device *pdev) if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask)) { + atdma->dma_common.device_prep_dma_memset = atc_prep_dma_memset; + atdma->dma_common.fill_align = DMAENGINE_ALIGN_4_BYTES; + } + if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; /* controller can do slave DMA: can trigger cyclic transfers */ @@ -1884,8 +1995,9 @@ static int __init at_dma_probe(struct platform_device *pdev) dma_writel(atdma, EN, AT_DMA_ENABLE); - dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n", + dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s%s), %d channels\n", dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "", + dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "", dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", dma_has_cap(DMA_SG, atdma->dma_common.cap_mask) ? "sg-cpy " : "", plat_dat->nr_channels); @@ -1910,8 +2022,10 @@ static int __init at_dma_probe(struct platform_device *pdev) err_of_dma_controller_register: dma_async_device_unregister(&atdma->dma_common); + dma_pool_destroy(atdma->memset_pool); +err_memset_pool_create: dma_pool_destroy(atdma->dma_desc_pool); -err_pool_create: +err_desc_pool_create: free_irq(platform_get_irq(pdev, 0), atdma); err_irq: clk_disable_unprepare(atdma->clk); @@ -1936,6 +2050,7 @@ static int at_dma_remove(struct platform_device *pdev) at_dma_off(atdma); dma_async_device_unregister(&atdma->dma_common); + dma_pool_destroy(atdma->memset_pool); dma_pool_destroy(atdma->dma_desc_pool); free_irq(platform_get_irq(pdev, 0), atdma); diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 7f5a08230f76..c3bebbe899ac 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -200,6 +200,11 @@ struct at_desc { size_t boundary; size_t dst_hole; size_t src_hole; + + /* Memset temporary buffer */ + bool memset; + dma_addr_t memset_paddr; + int *memset_vaddr; }; static inline struct at_desc * @@ -330,6 +335,7 @@ struct at_dma { u8 all_chan_mask; struct dma_pool *dma_desc_pool; + struct dma_pool *memset_pool; /* AT THE END channels table */ struct at_dma_chan chan[0]; }; diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 40afa2a16cfc..a165b4bfd330 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -625,12 +625,12 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, unsigned long flags, void *context) { - struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); - struct at_xdmac_desc *first = NULL, *prev = NULL; - struct scatterlist *sg; - int i; - unsigned int xfer_size = 0; - unsigned long irqflags; + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *first = NULL, *prev = NULL; + struct scatterlist *sg; + int i; + unsigned int xfer_size = 0; + unsigned long irqflags; struct dma_async_tx_descriptor *ret = NULL; if (!sgl) @@ -797,10 +797,7 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, list_add_tail(&desc->desc_node, &first->descs_list); } - prev->lld.mbr_nda = first->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", - __func__, prev, &prev->lld.mbr_nda); + at_xdmac_queue_desc(chan, prev, first); first->tx_dma_desc.flags = flags; first->xfer_size = buf_len; first->direction = direction; @@ -1135,7 +1132,7 @@ static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan, * SAMA5D4x), so we can use the same interface for source and dest, * that solves the fact we don't know the direction. */ - u32 chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM + u32 chan_cc = AT_XDMAC_CC_DAM_UBS_AM | AT_XDMAC_CC_SAM_INCREMENTED_AM | AT_XDMAC_CC_DIF(0) | AT_XDMAC_CC_SIF(0) @@ -1203,6 +1200,168 @@ at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, return &desc->tx_dma_desc; } +static struct dma_async_tx_descriptor * +at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, int value, + unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *desc, *pdesc = NULL, + *ppdesc = NULL, *first = NULL; + struct scatterlist *sg, *psg = NULL, *ppsg = NULL; + size_t stride = 0, pstride = 0, len = 0; + int i; + + if (!sgl) + return NULL; + + dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n", + __func__, sg_len, value, flags); + + /* Prepare descriptors. */ + for_each_sg(sgl, sg, sg_len, i) { + dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n", + __func__, sg_dma_address(sg), sg_dma_len(sg), + value, flags); + desc = at_xdmac_memset_create_desc(chan, atchan, + sg_dma_address(sg), + sg_dma_len(sg), + value); + if (!desc && first) + list_splice_init(&first->descs_list, + &atchan->free_descs_list); + + if (!first) + first = desc; + + /* Update our strides */ + pstride = stride; + if (psg) + stride = sg_dma_address(sg) - + (sg_dma_address(psg) + sg_dma_len(psg)); + + /* + * The scatterlist API gives us only the address and + * length of each elements. + * + * Unfortunately, we don't have the stride, which we + * will need to compute. + * + * That make us end up in a situation like this one: + * len stride len stride len + * +-------+ +-------+ +-------+ + * | N-2 | | N-1 | | N | + * +-------+ +-------+ +-------+ + * + * We need all these three elements (N-2, N-1 and N) + * to actually take the decision on whether we need to + * queue N-1 or reuse N-2. + * + * We will only consider N if it is the last element. + */ + if (ppdesc && pdesc) { + if ((stride == pstride) && + (sg_dma_len(ppsg) == sg_dma_len(psg))) { + dev_dbg(chan2dev(chan), + "%s: desc 0x%p can be merged with desc 0x%p\n", + __func__, pdesc, ppdesc); + + /* + * Increment the block count of the + * N-2 descriptor + */ + at_xdmac_increment_block_count(chan, ppdesc); + ppdesc->lld.mbr_dus = stride; + + /* + * Put back the N-1 descriptor in the + * free descriptor list + */ + list_add_tail(&pdesc->desc_node, + &atchan->free_descs_list); + + /* + * Make our N-1 descriptor pointer + * point to the N-2 since they were + * actually merged. + */ + pdesc = ppdesc; + + /* + * Rule out the case where we don't have + * pstride computed yet (our second sg + * element) + * + * We also want to catch the case where there + * would be a negative stride, + */ + } else if (pstride || + sg_dma_address(sg) < sg_dma_address(psg)) { + /* + * Queue the N-1 descriptor after the + * N-2 + */ + at_xdmac_queue_desc(chan, ppdesc, pdesc); + + /* + * Add the N-1 descriptor to the list + * of the descriptors used for this + * transfer + */ + list_add_tail(&desc->desc_node, + &first->descs_list); + dev_dbg(chan2dev(chan), + "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + } + } + + /* + * If we are the last element, just see if we have the + * same size than the previous element. + * + * If so, we can merge it with the previous descriptor + * since we don't care about the stride anymore. + */ + if ((i == (sg_len - 1)) && + sg_dma_len(ppsg) == sg_dma_len(psg)) { + dev_dbg(chan2dev(chan), + "%s: desc 0x%p can be merged with desc 0x%p\n", + __func__, desc, pdesc); + + /* + * Increment the block count of the N-1 + * descriptor + */ + at_xdmac_increment_block_count(chan, pdesc); + pdesc->lld.mbr_dus = stride; + + /* + * Put back the N descriptor in the free + * descriptor list + */ + list_add_tail(&desc->desc_node, + &atchan->free_descs_list); + } + + /* Update our descriptors */ + ppdesc = pdesc; + pdesc = desc; + + /* Update our scatter pointers */ + ppsg = psg; + psg = sg; + + len += sg_dma_len(sg); + } + + first->tx_dma_desc.cookie = -EBUSY; + first->tx_dma_desc.flags = flags; + first->xfer_size = len; + + return &first->tx_dma_desc; +} + static enum dma_status at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, struct dma_tx_state *txstate) @@ -1736,6 +1895,7 @@ static int at_xdmac_probe(struct platform_device *pdev) dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask); dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask); dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask); + dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask); dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask); /* * Without DMA_PRIVATE the driver is not able to allocate more than @@ -1751,6 +1911,7 @@ static int at_xdmac_probe(struct platform_device *pdev) atxdmac->dma.device_prep_interleaved_dma = at_xdmac_prep_interleaved; atxdmac->dma.device_prep_dma_memcpy = at_xdmac_prep_dma_memcpy; atxdmac->dma.device_prep_dma_memset = at_xdmac_prep_dma_memset; + atxdmac->dma.device_prep_dma_memset_sg = at_xdmac_prep_dma_memset_sg; atxdmac->dma.device_prep_slave_sg = at_xdmac_prep_slave_sg; atxdmac->dma.device_config = at_xdmac_device_config; atxdmac->dma.device_pause = at_xdmac_device_pause; diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index fd22dd36985f..c340ca9bd2b5 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -2730,7 +2730,7 @@ static int __init coh901318_probe(struct platform_device *pdev) * This controller can only access address at even 32bit boundaries, * i.e. 2^2 */ - base->dma_memcpy.copy_align = 2; + base->dma_memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES; err = dma_async_device_register(&base->dma_memcpy); if (err) diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c new file mode 100644 index 000000000000..5b2395e7e04d --- /dev/null +++ b/drivers/dma/dma-axi-dmac.c @@ -0,0 +1,691 @@ +/* + * Driver for the Analog Devices AXI-DMAC core + * + * Copyright 2013-2015 Analog Devices Inc. + * Author: Lars-Peter Clausen <lars@metafoo.de> + * + * Licensed under the GPL-2. + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/dmaengine.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_dma.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include <dt-bindings/dma/axi-dmac.h> + +#include "dmaengine.h" +#include "virt-dma.h" + +/* + * The AXI-DMAC is a soft IP core that is used in FPGA designs. The core has + * various instantiation parameters which decided the exact feature set support + * by the core. + * + * Each channel of the core has a source interface and a destination interface. + * The number of channels and the type of the channel interfaces is selected at + * configuration time. A interface can either be a connected to a central memory + * interconnect, which allows access to system memory, or it can be connected to + * a dedicated bus which is directly connected to a data port on a peripheral. + * Given that those are configuration options of the core that are selected when + * it is instantiated this means that they can not be changed by software at + * runtime. By extension this means that each channel is uni-directional. It can + * either be device to memory or memory to device, but not both. Also since the + * device side is a dedicated data bus only connected to a single peripheral + * there is no address than can or needs to be configured for the device side. + */ + +#define AXI_DMAC_REG_IRQ_MASK 0x80 +#define AXI_DMAC_REG_IRQ_PENDING 0x84 +#define AXI_DMAC_REG_IRQ_SOURCE 0x88 + +#define AXI_DMAC_REG_CTRL 0x400 +#define AXI_DMAC_REG_TRANSFER_ID 0x404 +#define AXI_DMAC_REG_START_TRANSFER 0x408 +#define AXI_DMAC_REG_FLAGS 0x40c +#define AXI_DMAC_REG_DEST_ADDRESS 0x410 +#define AXI_DMAC_REG_SRC_ADDRESS 0x414 +#define AXI_DMAC_REG_X_LENGTH 0x418 +#define AXI_DMAC_REG_Y_LENGTH 0x41c +#define AXI_DMAC_REG_DEST_STRIDE 0x420 +#define AXI_DMAC_REG_SRC_STRIDE 0x424 +#define AXI_DMAC_REG_TRANSFER_DONE 0x428 +#define AXI_DMAC_REG_ACTIVE_TRANSFER_ID 0x42c +#define AXI_DMAC_REG_STATUS 0x430 +#define AXI_DMAC_REG_CURRENT_SRC_ADDR 0x434 +#define AXI_DMAC_REG_CURRENT_DEST_ADDR 0x438 + +#define AXI_DMAC_CTRL_ENABLE BIT(0) +#define AXI_DMAC_CTRL_PAUSE BIT(1) + +#define AXI_DMAC_IRQ_SOT BIT(0) +#define AXI_DMAC_IRQ_EOT BIT(1) + +#define AXI_DMAC_FLAG_CYCLIC BIT(0) + +struct axi_dmac_sg { + dma_addr_t src_addr; + dma_addr_t dest_addr; + unsigned int x_len; + unsigned int y_len; + unsigned int dest_stride; + unsigned int src_stride; + unsigned int id; +}; + +struct axi_dmac_desc { + struct virt_dma_desc vdesc; + bool cyclic; + + unsigned int num_submitted; + unsigned int num_completed; + unsigned int num_sgs; + struct axi_dmac_sg sg[]; +}; + +struct axi_dmac_chan { + struct virt_dma_chan vchan; + + struct axi_dmac_desc *next_desc; + struct list_head active_descs; + enum dma_transfer_direction direction; + + unsigned int src_width; + unsigned int dest_width; + unsigned int src_type; + unsigned int dest_type; + + unsigned int max_length; + unsigned int align_mask; + + bool hw_cyclic; + bool hw_2d; +}; + +struct axi_dmac { + void __iomem *base; + int irq; + + struct clk *clk; + + struct dma_device dma_dev; + struct axi_dmac_chan chan; + + struct device_dma_parameters dma_parms; +}; + +static struct axi_dmac *chan_to_axi_dmac(struct axi_dmac_chan *chan) +{ + return container_of(chan->vchan.chan.device, struct axi_dmac, + dma_dev); +} + +static struct axi_dmac_chan *to_axi_dmac_chan(struct dma_chan *c) +{ + return container_of(c, struct axi_dmac_chan, vchan.chan); +} + +static struct axi_dmac_desc *to_axi_dmac_desc(struct virt_dma_desc *vdesc) +{ + return container_of(vdesc, struct axi_dmac_desc, vdesc); +} + +static void axi_dmac_write(struct axi_dmac *axi_dmac, unsigned int reg, + unsigned int val) +{ + writel(val, axi_dmac->base + reg); +} + +static int axi_dmac_read(struct axi_dmac *axi_dmac, unsigned int reg) +{ + return readl(axi_dmac->base + reg); +} + +static int axi_dmac_src_is_mem(struct axi_dmac_chan *chan) +{ + return chan->src_type == AXI_DMAC_BUS_TYPE_AXI_MM; +} + +static int axi_dmac_dest_is_mem(struct axi_dmac_chan *chan) +{ + return chan->dest_type == AXI_DMAC_BUS_TYPE_AXI_MM; +} + +static bool axi_dmac_check_len(struct axi_dmac_chan *chan, unsigned int len) +{ + if (len == 0 || len > chan->max_length) + return false; + if ((len & chan->align_mask) != 0) /* Not aligned */ + return false; + return true; +} + +static bool axi_dmac_check_addr(struct axi_dmac_chan *chan, dma_addr_t addr) +{ + if ((addr & chan->align_mask) != 0) /* Not aligned */ + return false; + return true; +} + +static void axi_dmac_start_transfer(struct axi_dmac_chan *chan) +{ + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + struct virt_dma_desc *vdesc; + struct axi_dmac_desc *desc; + struct axi_dmac_sg *sg; + unsigned int flags = 0; + unsigned int val; + + val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER); + if (val) /* Queue is full, wait for the next SOT IRQ */ + return; + + desc = chan->next_desc; + + if (!desc) { + vdesc = vchan_next_desc(&chan->vchan); + if (!vdesc) + return; + list_move_tail(&vdesc->node, &chan->active_descs); + desc = to_axi_dmac_desc(vdesc); + } + sg = &desc->sg[desc->num_submitted]; + + desc->num_submitted++; + if (desc->num_submitted == desc->num_sgs) + chan->next_desc = NULL; + else + chan->next_desc = desc; + + sg->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID); + + if (axi_dmac_dest_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride); + } + + if (axi_dmac_src_is_mem(chan)) { + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr); + axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride); + } + + /* + * If the hardware supports cyclic transfers and there is no callback to + * call, enable hw cyclic mode to avoid unnecessary interrupts. + */ + if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback) + flags |= AXI_DMAC_FLAG_CYCLIC; + + axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1); + axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1); + axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags); + axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1); +} + +static struct axi_dmac_desc *axi_dmac_active_desc(struct axi_dmac_chan *chan) +{ + return list_first_entry_or_null(&chan->active_descs, + struct axi_dmac_desc, vdesc.node); +} + +static void axi_dmac_transfer_done(struct axi_dmac_chan *chan, + unsigned int completed_transfers) +{ + struct axi_dmac_desc *active; + struct axi_dmac_sg *sg; + + active = axi_dmac_active_desc(chan); + if (!active) + return; + + if (active->cyclic) { + vchan_cyclic_callback(&active->vdesc); + } else { + do { + sg = &active->sg[active->num_completed]; + if (!(BIT(sg->id) & completed_transfers)) + break; + active->num_completed++; + if (active->num_completed == active->num_sgs) { + list_del(&active->vdesc.node); + vchan_cookie_complete(&active->vdesc); + active = axi_dmac_active_desc(chan); + } + } while (active); + } +} + +static irqreturn_t axi_dmac_interrupt_handler(int irq, void *devid) +{ + struct axi_dmac *dmac = devid; + unsigned int pending; + + pending = axi_dmac_read(dmac, AXI_DMAC_REG_IRQ_PENDING); + axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_PENDING, pending); + + spin_lock(&dmac->chan.vchan.lock); + /* One or more transfers have finished */ + if (pending & AXI_DMAC_IRQ_EOT) { + unsigned int completed; + + completed = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_DONE); + axi_dmac_transfer_done(&dmac->chan, completed); + } + /* Space has become available in the descriptor queue */ + if (pending & AXI_DMAC_IRQ_SOT) + axi_dmac_start_transfer(&dmac->chan); + spin_unlock(&dmac->chan.vchan.lock); + + return IRQ_HANDLED; +} + +static int axi_dmac_terminate_all(struct dma_chan *c) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vchan.lock, flags); + axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, 0); + chan->next_desc = NULL; + vchan_get_all_descriptors(&chan->vchan, &head); + list_splice_tail_init(&chan->active_descs, &head); + spin_unlock_irqrestore(&chan->vchan.lock, flags); + + vchan_dma_desc_free_list(&chan->vchan, &head); + + return 0; +} + +static void axi_dmac_issue_pending(struct dma_chan *c) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac *dmac = chan_to_axi_dmac(chan); + unsigned long flags; + + axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE); + + spin_lock_irqsave(&chan->vchan.lock, flags); + if (vchan_issue_pending(&chan->vchan)) + axi_dmac_start_transfer(chan); + spin_unlock_irqrestore(&chan->vchan.lock, flags); +} + +static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs) +{ + struct axi_dmac_desc *desc; + + desc = kzalloc(sizeof(struct axi_dmac_desc) + + sizeof(struct axi_dmac_sg) * num_sgs, GFP_NOWAIT); + if (!desc) + return NULL; + + desc->num_sgs = num_sgs; + + return desc; +} + +static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg( + struct dma_chan *c, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction direction, + unsigned long flags, void *context) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac_desc *desc; + struct scatterlist *sg; + unsigned int i; + + if (direction != chan->direction) + return NULL; + + desc = axi_dmac_alloc_desc(sg_len); + if (!desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) || + !axi_dmac_check_len(chan, sg_dma_len(sg))) { + kfree(desc); + return NULL; + } + + if (direction == DMA_DEV_TO_MEM) + desc->sg[i].dest_addr = sg_dma_address(sg); + else + desc->sg[i].src_addr = sg_dma_address(sg); + desc->sg[i].x_len = sg_dma_len(sg); + desc->sg[i].y_len = 1; + } + + desc->cyclic = false; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic( + struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac_desc *desc; + unsigned int num_periods, i; + + if (direction != chan->direction) + return NULL; + + if (!axi_dmac_check_len(chan, buf_len) || + !axi_dmac_check_addr(chan, buf_addr)) + return NULL; + + if (period_len == 0 || buf_len % period_len) + return NULL; + + num_periods = buf_len / period_len; + + desc = axi_dmac_alloc_desc(num_periods); + if (!desc) + return NULL; + + for (i = 0; i < num_periods; i++) { + if (direction == DMA_DEV_TO_MEM) + desc->sg[i].dest_addr = buf_addr; + else + desc->sg[i].src_addr = buf_addr; + desc->sg[i].x_len = period_len; + desc->sg[i].y_len = 1; + buf_addr += period_len; + } + + desc->cyclic = true; + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved( + struct dma_chan *c, struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct axi_dmac_chan *chan = to_axi_dmac_chan(c); + struct axi_dmac_desc *desc; + size_t dst_icg, src_icg; + + if (xt->frame_size != 1) + return NULL; + + if (xt->dir != chan->direction) + return NULL; + + if (axi_dmac_src_is_mem(chan)) { + if (!xt->src_inc || !axi_dmac_check_addr(chan, xt->src_start)) + return NULL; + } + + if (axi_dmac_dest_is_mem(chan)) { + if (!xt->dst_inc || !axi_dmac_check_addr(chan, xt->dst_start)) + return NULL; + } + + dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]); + src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]); + + if (chan->hw_2d) { + if (!axi_dmac_check_len(chan, xt->sgl[0].size) || + !axi_dmac_check_len(chan, xt->numf)) + return NULL; + if (xt->sgl[0].size + dst_icg > chan->max_length || + xt->sgl[0].size + src_icg > chan->max_length) + return NULL; + } else { + if (dst_icg != 0 || src_icg != 0) + return NULL; + if (chan->max_length / xt->sgl[0].size < xt->numf) + return NULL; + if (!axi_dmac_check_len(chan, xt->sgl[0].size * xt->numf)) + return NULL; + } + + desc = axi_dmac_alloc_desc(1); + if (!desc) + return NULL; + + if (axi_dmac_src_is_mem(chan)) { + desc->sg[0].src_addr = xt->src_start; + desc->sg[0].src_stride = xt->sgl[0].size + src_icg; + } + + if (axi_dmac_dest_is_mem(chan)) { + desc->sg[0].dest_addr = xt->dst_start; + desc->sg[0].dest_stride = xt->sgl[0].size + dst_icg; + } + + if (chan->hw_2d) { + desc->sg[0].x_len = xt->sgl[0].size; + desc->sg[0].y_len = xt->numf; + } else { + desc->sg[0].x_len = xt->sgl[0].size * xt->numf; + desc->sg[0].y_len = 1; + } + + return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); +} + +static void axi_dmac_free_chan_resources(struct dma_chan *c) +{ + vchan_free_chan_resources(to_virt_chan(c)); +} + +static void axi_dmac_desc_free(struct virt_dma_desc *vdesc) +{ + kfree(container_of(vdesc, struct axi_dmac_desc, vdesc)); +} + +/* + * The configuration stored in the devicetree matches the configuration + * parameters of the peripheral instance and allows the driver to know which + * features are implemented and how it should behave. + */ +static int axi_dmac_parse_chan_dt(struct device_node *of_chan, + struct axi_dmac_chan *chan) +{ + u32 val; + int ret; + + ret = of_property_read_u32(of_chan, "reg", &val); + if (ret) + return ret; + + /* We only support 1 channel for now */ + if (val != 0) + return -EINVAL; + + ret = of_property_read_u32(of_chan, "adi,source-bus-type", &val); + if (ret) + return ret; + if (val > AXI_DMAC_BUS_TYPE_FIFO) + return -EINVAL; + chan->src_type = val; + + ret = of_property_read_u32(of_chan, "adi,destination-bus-type", &val); + if (ret) + return ret; + if (val > AXI_DMAC_BUS_TYPE_FIFO) + return -EINVAL; + chan->dest_type = val; + + ret = of_property_read_u32(of_chan, "adi,source-bus-width", &val); + if (ret) + return ret; + chan->src_width = val / 8; + + ret = of_property_read_u32(of_chan, "adi,destination-bus-width", &val); + if (ret) + return ret; + chan->dest_width = val / 8; + + ret = of_property_read_u32(of_chan, "adi,length-width", &val); + if (ret) + return ret; + + if (val >= 32) + chan->max_length = UINT_MAX; + else + chan->max_length = (1ULL << val) - 1; + + chan->align_mask = max(chan->dest_width, chan->src_width) - 1; + + if (axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan)) + chan->direction = DMA_MEM_TO_MEM; + else if (!axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan)) + chan->direction = DMA_MEM_TO_DEV; + else if (axi_dmac_dest_is_mem(chan) && !axi_dmac_src_is_mem(chan)) + chan->direction = DMA_DEV_TO_MEM; + else + chan->direction = DMA_DEV_TO_DEV; + + chan->hw_cyclic = of_property_read_bool(of_chan, "adi,cyclic"); + chan->hw_2d = of_property_read_bool(of_chan, "adi,2d"); + + return 0; +} + +static int axi_dmac_probe(struct platform_device *pdev) +{ + struct device_node *of_channels, *of_chan; + struct dma_device *dma_dev; + struct axi_dmac *dmac; + struct resource *res; + int ret; + + dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); + if (!dmac) + return -ENOMEM; + + dmac->irq = platform_get_irq(pdev, 0); + if (dmac->irq <= 0) + return -EINVAL; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dmac->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dmac->base)) + return PTR_ERR(dmac->base); + + dmac->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(dmac->clk)) + return PTR_ERR(dmac->clk); + + INIT_LIST_HEAD(&dmac->chan.active_descs); + + of_channels = of_get_child_by_name(pdev->dev.of_node, "adi,channels"); + if (of_channels == NULL) + return -ENODEV; + + for_each_child_of_node(of_channels, of_chan) { + ret = axi_dmac_parse_chan_dt(of_chan, &dmac->chan); + if (ret) { + of_node_put(of_chan); + of_node_put(of_channels); + return -EINVAL; + } + } + of_node_put(of_channels); + + pdev->dev.dma_parms = &dmac->dma_parms; + dma_set_max_seg_size(&pdev->dev, dmac->chan.max_length); + + dma_dev = &dmac->dma_dev; + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask); + dma_dev->device_free_chan_resources = axi_dmac_free_chan_resources; + dma_dev->device_tx_status = dma_cookie_status; + dma_dev->device_issue_pending = axi_dmac_issue_pending; + dma_dev->device_prep_slave_sg = axi_dmac_prep_slave_sg; + dma_dev->device_prep_dma_cyclic = axi_dmac_prep_dma_cyclic; + dma_dev->device_prep_interleaved_dma = axi_dmac_prep_interleaved; + dma_dev->device_terminate_all = axi_dmac_terminate_all; + dma_dev->dev = &pdev->dev; + dma_dev->chancnt = 1; + dma_dev->src_addr_widths = BIT(dmac->chan.src_width); + dma_dev->dst_addr_widths = BIT(dmac->chan.dest_width); + dma_dev->directions = BIT(dmac->chan.direction); + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + INIT_LIST_HEAD(&dma_dev->channels); + + dmac->chan.vchan.desc_free = axi_dmac_desc_free; + vchan_init(&dmac->chan.vchan, dma_dev); + + ret = clk_prepare_enable(dmac->clk); + if (ret < 0) + return ret; + + axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00); + + ret = dma_async_device_register(dma_dev); + if (ret) + goto err_clk_disable; + + ret = of_dma_controller_register(pdev->dev.of_node, + of_dma_xlate_by_chan_id, dma_dev); + if (ret) + goto err_unregister_device; + + ret = request_irq(dmac->irq, axi_dmac_interrupt_handler, 0, + dev_name(&pdev->dev), dmac); + if (ret) + goto err_unregister_of; + + platform_set_drvdata(pdev, dmac); + + return 0; + +err_unregister_of: + of_dma_controller_free(pdev->dev.of_node); +err_unregister_device: + dma_async_device_unregister(&dmac->dma_dev); +err_clk_disable: + clk_disable_unprepare(dmac->clk); + + return ret; +} + +static int axi_dmac_remove(struct platform_device *pdev) +{ + struct axi_dmac *dmac = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + free_irq(dmac->irq, dmac); + tasklet_kill(&dmac->chan.vchan.task); + dma_async_device_unregister(&dmac->dma_dev); + clk_disable_unprepare(dmac->clk); + + return 0; +} + +static const struct of_device_id axi_dmac_of_match_table[] = { + { .compatible = "adi,axi-dmac-1.00.a" }, + { }, +}; + +static struct platform_driver axi_dmac_driver = { + .driver = { + .name = "dma-axi-dmac", + .of_match_table = axi_dmac_of_match_table, + }, + .probe = axi_dmac_probe, + .remove = axi_dmac_remove, +}; +module_platform_driver(axi_dmac_driver); + +MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>"); +MODULE_DESCRIPTION("DMA controller driver for the AXI-DMAC controller"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 26d2f0e09ea3..dade7c47ff18 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -145,7 +145,8 @@ struct jz4780_dma_dev { struct jz4780_dma_chan chan[JZ_DMA_NR_CHANNELS]; }; -struct jz4780_dma_data { +struct jz4780_dma_filter_data { + struct device_node *of_node; uint32_t transfer_type; int channel; }; @@ -214,11 +215,25 @@ static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc) kfree(desc); } -static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord) +static uint32_t jz4780_dma_transfer_size(unsigned long val, uint32_t *shift) { - *ord = ffs(val) - 1; + int ord = ffs(val) - 1; - switch (*ord) { + /* + * 8 byte transfer sizes unsupported so fall back on 4. If it's larger + * than the maximum, just limit it. It is perfectly safe to fall back + * in this way since we won't exceed the maximum burst size supported + * by the device, the only effect is reduced efficiency. This is better + * than refusing to perform the request at all. + */ + if (ord == 3) + ord = 2; + else if (ord > 7) + ord = 7; + + *shift = ord; + + switch (ord) { case 0: return JZ_DMA_SIZE_1_BYTE; case 1: @@ -231,20 +246,17 @@ static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord) return JZ_DMA_SIZE_32_BYTE; case 6: return JZ_DMA_SIZE_64_BYTE; - case 7: - return JZ_DMA_SIZE_128_BYTE; default: - return -EINVAL; + return JZ_DMA_SIZE_128_BYTE; } } -static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, +static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, struct jz4780_dma_hwdesc *desc, dma_addr_t addr, size_t len, enum dma_transfer_direction direction) { struct dma_slave_config *config = &jzchan->config; uint32_t width, maxburst, tsz; - int ord; if (direction == DMA_MEM_TO_DEV) { desc->dcm = JZ_DMA_DCM_SAI; @@ -271,8 +283,8 @@ static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, * divisible by the transfer size, and we must not use more than the * maximum burst specified by the user. */ - tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst), &ord); - jzchan->transfer_shift = ord; + tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst), + &jzchan->transfer_shift); switch (width) { case DMA_SLAVE_BUSWIDTH_1_BYTE: @@ -289,12 +301,14 @@ static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, desc->dcm |= width << JZ_DMA_DCM_SP_SHIFT; desc->dcm |= width << JZ_DMA_DCM_DP_SHIFT; - desc->dtc = len >> ord; + desc->dtc = len >> jzchan->transfer_shift; + return 0; } static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, - enum dma_transfer_direction direction, unsigned long flags) + enum dma_transfer_direction direction, unsigned long flags, + void *context) { struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_desc *desc; @@ -307,12 +321,11 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg( for (i = 0; i < sg_len; i++) { err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], - sg_dma_address(&sgl[i]), - sg_dma_len(&sgl[i]), - direction); + sg_dma_address(&sgl[i]), + sg_dma_len(&sgl[i]), + direction); if (err < 0) - return ERR_PTR(err); - + return NULL; desc->desc[i].dcm |= JZ_DMA_DCM_TIE; @@ -354,9 +367,9 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic( for (i = 0; i < periods; i++) { err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], buf_addr, - period_len, direction); + period_len, direction); if (err < 0) - return ERR_PTR(err); + return NULL; buf_addr += period_len; @@ -390,15 +403,13 @@ struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_desc *desc; uint32_t tsz; - int ord; desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY); if (!desc) return NULL; - tsz = jz4780_dma_transfer_size(dest | src | len, &ord); - if (tsz < 0) - return ERR_PTR(tsz); + tsz = jz4780_dma_transfer_size(dest | src | len, + &jzchan->transfer_shift); desc->desc[0].dsa = src; desc->desc[0].dta = dest; @@ -407,7 +418,7 @@ struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( tsz << JZ_DMA_DCM_TSZ_SHIFT | JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_SP_SHIFT | JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_DP_SHIFT; - desc->desc[0].dtc = len >> ord; + desc->desc[0].dtc = len >> jzchan->transfer_shift; return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags); } @@ -484,8 +495,9 @@ static void jz4780_dma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&jzchan->vchan.lock, flags); } -static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan) +static int jz4780_dma_terminate_all(struct dma_chan *chan) { + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); unsigned long flags; LIST_HEAD(head); @@ -507,9 +519,11 @@ static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan) return 0; } -static int jz4780_dma_slave_config(struct jz4780_dma_chan *jzchan, - const struct dma_slave_config *config) +static int jz4780_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) { + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + if ((config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) || (config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)) return -EINVAL; @@ -567,8 +581,8 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, txstate->residue = 0; if (vdesc && jzchan->desc && vdesc == &jzchan->desc->vdesc - && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) - status = DMA_ERROR; + && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) + status = DMA_ERROR; spin_unlock_irqrestore(&jzchan->vchan.lock, flags); return status; @@ -671,7 +685,10 @@ static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param) { struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); - struct jz4780_dma_data *data = param; + struct jz4780_dma_filter_data *data = param; + + if (jzdma->dma_device.dev->of_node != data->of_node) + return false; if (data->channel > -1) { if (data->channel != jzchan->id) @@ -690,11 +707,12 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, { struct jz4780_dma_dev *jzdma = ofdma->of_dma_data; dma_cap_mask_t mask = jzdma->dma_device.cap_mask; - struct jz4780_dma_data data; + struct jz4780_dma_filter_data data; if (dma_spec->args_count != 2) return NULL; + data.of_node = ofdma->of_node; data.transfer_type = dma_spec->args[0]; data.channel = dma_spec->args[1]; @@ -713,9 +731,14 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, data.channel); return NULL; } - } - return dma_request_channel(mask, jz4780_dma_filter_fn, &data); + jzdma->chan[data.channel].transfer_type = data.transfer_type; + + return dma_get_slave_channel( + &jzdma->chan[data.channel].vchan.chan); + } else { + return dma_request_channel(mask, jz4780_dma_filter_fn, &data); + } } static int jz4780_dma_probe(struct platform_device *pdev) @@ -743,23 +766,26 @@ static int jz4780_dma_probe(struct platform_device *pdev) if (IS_ERR(jzdma->base)) return PTR_ERR(jzdma->base); - jzdma->irq = platform_get_irq(pdev, 0); - if (jzdma->irq < 0) { + ret = platform_get_irq(pdev, 0); + if (ret < 0) { dev_err(dev, "failed to get IRQ: %d\n", ret); - return jzdma->irq; + return ret; } - ret = devm_request_irq(dev, jzdma->irq, jz4780_dma_irq_handler, 0, - dev_name(dev), jzdma); + jzdma->irq = ret; + + ret = request_irq(jzdma->irq, jz4780_dma_irq_handler, 0, dev_name(dev), + jzdma); if (ret) { dev_err(dev, "failed to request IRQ %u!\n", jzdma->irq); - return -EINVAL; + return ret; } jzdma->clk = devm_clk_get(dev, NULL); if (IS_ERR(jzdma->clk)) { dev_err(dev, "failed to get clock\n"); - return PTR_ERR(jzdma->clk); + ret = PTR_ERR(jzdma->clk); + goto err_free_irq; } clk_prepare_enable(jzdma->clk); @@ -775,13 +801,13 @@ static int jz4780_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_CYCLIC, dd->cap_mask); dd->dev = dev; - dd->copy_align = 2; /* 2^2 = 4 byte alignment */ + dd->copy_align = DMAENGINE_ALIGN_4_BYTES; dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources; dd->device_free_chan_resources = jz4780_dma_free_chan_resources; dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg; dd->device_prep_dma_cyclic = jz4780_dma_prep_dma_cyclic; dd->device_prep_dma_memcpy = jz4780_dma_prep_dma_memcpy; - dd->device_config = jz4780_dma_slave_config; + dd->device_config = jz4780_dma_config; dd->device_terminate_all = jz4780_dma_terminate_all; dd->device_tx_status = jz4780_dma_tx_status; dd->device_issue_pending = jz4780_dma_issue_pending; @@ -790,7 +816,6 @@ static int jz4780_dma_probe(struct platform_device *pdev) dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; - /* * Enable DMA controller, mark all channels as not programmable. * Also set the FMSC bit - it increases MSC performance, so it makes @@ -832,15 +857,24 @@ err_unregister_dev: err_disable_clk: clk_disable_unprepare(jzdma->clk); + +err_free_irq: + free_irq(jzdma->irq, jzdma); return ret; } static int jz4780_dma_remove(struct platform_device *pdev) { struct jz4780_dma_dev *jzdma = platform_get_drvdata(pdev); + int i; of_dma_controller_free(pdev->dev.of_node); - devm_free_irq(&pdev->dev, jzdma->irq, jzdma); + + free_irq(jzdma->irq, jzdma); + + for (i = 0; i < JZ_DMA_NR_CHANNELS; i++) + tasklet_kill(&jzdma->chan[i].vchan.task); + dma_async_device_unregister(&jzdma->dma_device); return 0; } diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig index 36e02f0f645e..e00c9b022964 100644 --- a/drivers/dma/dw/Kconfig +++ b/drivers/dma/dw/Kconfig @@ -6,6 +6,9 @@ config DW_DMAC_CORE tristate select DMA_ENGINE +config DW_DMAC_BIG_ENDIAN_IO + bool + config DW_DMAC tristate "Synopsys DesignWare AHB DMA platform driver" select DW_DMAC_CORE @@ -23,6 +26,3 @@ config DW_DMAC_PCI Support the Synopsys DesignWare AHB DMA controller on the platfroms that enumerate it as a PCI device. For example, Intel Medfield has integrated this GPDMA controller. - -config DW_DMAC_BIG_ENDIAN_IO - bool diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 88853af69489..3e5d4f193005 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1000,7 +1000,7 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma, * code using dma memcpy must make sure alignment of * length is at dma->copy_align boundary. */ - dma->copy_align = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma->copy_align = DMAENGINE_ALIGN_4_BYTES; INIT_LIST_HEAD(&dma->channels); } diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index f42f71e37e73..7669c7dd1e34 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -99,21 +99,13 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) static void hsu_dma_stop_channel(struct hsu_dma_chan *hsuc) { - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); hsu_chan_disable(hsuc); hsu_chan_writel(hsuc, HSU_CH_DCR, 0); - spin_unlock_irqrestore(&hsuc->lock, flags); } static void hsu_dma_start_channel(struct hsu_dma_chan *hsuc) { - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); hsu_dma_chan_start(hsuc); - spin_unlock_irqrestore(&hsuc->lock, flags); } static void hsu_dma_start_transfer(struct hsu_dma_chan *hsuc) @@ -139,9 +131,9 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc) unsigned long flags; u32 sr; - spin_lock_irqsave(&hsuc->lock, flags); + spin_lock_irqsave(&hsuc->vchan.lock, flags); sr = hsu_chan_readl(hsuc, HSU_CH_SR); - spin_unlock_irqrestore(&hsuc->lock, flags); + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); return sr; } @@ -273,14 +265,11 @@ static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc) struct hsu_dma_desc *desc = hsuc->desc; size_t bytes = hsu_dma_desc_size(desc); int i; - unsigned long flags; - spin_lock_irqsave(&hsuc->lock, flags); i = desc->active % HSU_DMA_CHAN_NR_DESC; do { bytes += hsu_chan_readl(hsuc, HSU_CH_DxTSR(i)); } while (--i >= 0); - spin_unlock_irqrestore(&hsuc->lock, flags); return bytes; } @@ -327,24 +316,6 @@ static int hsu_dma_slave_config(struct dma_chan *chan, return 0; } -static void hsu_dma_chan_deactivate(struct hsu_dma_chan *hsuc) -{ - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); - hsu_chan_disable(hsuc); - spin_unlock_irqrestore(&hsuc->lock, flags); -} - -static void hsu_dma_chan_activate(struct hsu_dma_chan *hsuc) -{ - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); - hsu_chan_enable(hsuc); - spin_unlock_irqrestore(&hsuc->lock, flags); -} - static int hsu_dma_pause(struct dma_chan *chan) { struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); @@ -352,7 +323,7 @@ static int hsu_dma_pause(struct dma_chan *chan) spin_lock_irqsave(&hsuc->vchan.lock, flags); if (hsuc->desc && hsuc->desc->status == DMA_IN_PROGRESS) { - hsu_dma_chan_deactivate(hsuc); + hsu_chan_disable(hsuc); hsuc->desc->status = DMA_PAUSED; } spin_unlock_irqrestore(&hsuc->vchan.lock, flags); @@ -368,7 +339,7 @@ static int hsu_dma_resume(struct dma_chan *chan) spin_lock_irqsave(&hsuc->vchan.lock, flags); if (hsuc->desc && hsuc->desc->status == DMA_PAUSED) { hsuc->desc->status = DMA_IN_PROGRESS; - hsu_dma_chan_activate(hsuc); + hsu_chan_enable(hsuc); } spin_unlock_irqrestore(&hsuc->vchan.lock, flags); @@ -441,8 +412,6 @@ int hsu_dma_probe(struct hsu_dma_chip *chip) hsuc->direction = (i & 0x1) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV; hsuc->reg = addr + i * HSU_DMA_CHAN_LENGTH; - - spin_lock_init(&hsuc->lock); } dma_cap_set(DMA_SLAVE, hsu->dma.cap_mask); diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index 0275233cf550..eeb9fff66967 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -78,7 +78,6 @@ struct hsu_dma_chan { struct virt_dma_chan vchan; void __iomem *reg; - spinlock_t lock; /* hardware configuration */ enum dma_transfer_direction direction; diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 865501fcc67d..48d85f8b95fe 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -1083,8 +1083,12 @@ static int __init imxdma_probe(struct platform_device *pdev) if (IS_ERR(imxdma->dma_ahb)) return PTR_ERR(imxdma->dma_ahb); - clk_prepare_enable(imxdma->dma_ipg); - clk_prepare_enable(imxdma->dma_ahb); + ret = clk_prepare_enable(imxdma->dma_ipg); + if (ret) + return ret; + ret = clk_prepare_enable(imxdma->dma_ahb); + if (ret) + goto disable_dma_ipg_clk; /* reset DMA module */ imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR); @@ -1094,20 +1098,20 @@ static int __init imxdma_probe(struct platform_device *pdev) dma_irq_handler, 0, "DMA", imxdma); if (ret) { dev_warn(imxdma->dev, "Can't register IRQ for DMA\n"); - goto err; + goto disable_dma_ahb_clk; } irq_err = platform_get_irq(pdev, 1); if (irq_err < 0) { ret = irq_err; - goto err; + goto disable_dma_ahb_clk; } ret = devm_request_irq(&pdev->dev, irq_err, imxdma_err_handler, 0, "DMA", imxdma); if (ret) { dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n"); - goto err; + goto disable_dma_ahb_clk; } } @@ -1144,7 +1148,7 @@ static int __init imxdma_probe(struct platform_device *pdev) dev_warn(imxdma->dev, "Can't register IRQ %d " "for DMA channel %d\n", irq + i, i); - goto err; + goto disable_dma_ahb_clk; } init_timer(&imxdmac->watchdog); imxdmac->watchdog.function = &imxdma_watchdog; @@ -1183,14 +1187,14 @@ static int __init imxdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imxdma); - imxdma->dma_device.copy_align = 2; /* 2^2 = 4 bytes alignment */ + imxdma->dma_device.copy_align = DMAENGINE_ALIGN_4_BYTES; imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms; dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff); ret = dma_async_device_register(&imxdma->dma_device); if (ret) { dev_err(&pdev->dev, "unable to register\n"); - goto err; + goto disable_dma_ahb_clk; } if (pdev->dev.of_node) { @@ -1206,9 +1210,10 @@ static int __init imxdma_probe(struct platform_device *pdev) err_of_dma_controller: dma_async_device_unregister(&imxdma->dma_device); -err: - clk_disable_unprepare(imxdma->dma_ipg); +disable_dma_ahb_clk: clk_disable_unprepare(imxdma->dma_ahb); +disable_dma_ipg_clk: + clk_disable_unprepare(imxdma->dma_ipg); return ret; } diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 77b6aab04f47..9d375bc7590a 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -35,12 +35,16 @@ #include <linux/platform_device.h> #include <linux/dmaengine.h> #include <linux/of.h> +#include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_dma.h> #include <asm/irq.h> #include <linux/platform_data/dma-imx-sdma.h> #include <linux/platform_data/dma-imx.h> +#include <linux/regmap.h> +#include <linux/mfd/syscon.h> +#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h> #include "dmaengine.h" @@ -124,6 +128,56 @@ #define CHANGE_ENDIANNESS 0x80 /* + * p_2_p watermark_level description + * Bits Name Description + * 0-7 Lower WML Lower watermark level + * 8 PS 1: Pad Swallowing + * 0: No Pad Swallowing + * 9 PA 1: Pad Adding + * 0: No Pad Adding + * 10 SPDIF If this bit is set both source + * and destination are on SPBA + * 11 Source Bit(SP) 1: Source on SPBA + * 0: Source on AIPS + * 12 Destination Bit(DP) 1: Destination on SPBA + * 0: Destination on AIPS + * 13-15 --------- MUST BE 0 + * 16-23 Higher WML HWML + * 24-27 N Total number of samples after + * which Pad adding/Swallowing + * must be done. It must be odd. + * 28 Lower WML Event(LWE) SDMA events reg to check for + * LWML event mask + * 0: LWE in EVENTS register + * 1: LWE in EVENTS2 register + * 29 Higher WML Event(HWE) SDMA events reg to check for + * HWML event mask + * 0: HWE in EVENTS register + * 1: HWE in EVENTS2 register + * 30 --------- MUST BE 0 + * 31 CONT 1: Amount of samples to be + * transferred is unknown and + * script will keep on + * transferring samples as long as + * both events are detected and + * script must be manually stopped + * by the application + * 0: The amount of samples to be + * transferred is equal to the + * count field of mode word + */ +#define SDMA_WATERMARK_LEVEL_LWML 0xFF +#define SDMA_WATERMARK_LEVEL_PS BIT(8) +#define SDMA_WATERMARK_LEVEL_PA BIT(9) +#define SDMA_WATERMARK_LEVEL_SPDIF BIT(10) +#define SDMA_WATERMARK_LEVEL_SP BIT(11) +#define SDMA_WATERMARK_LEVEL_DP BIT(12) +#define SDMA_WATERMARK_LEVEL_HWML (0xFF << 16) +#define SDMA_WATERMARK_LEVEL_LWE BIT(28) +#define SDMA_WATERMARK_LEVEL_HWE BIT(29) +#define SDMA_WATERMARK_LEVEL_CONT BIT(31) + +/* * Mode/Count of data node descriptors - IPCv2 */ struct sdma_mode_count { @@ -259,8 +313,9 @@ struct sdma_channel { struct sdma_buffer_descriptor *bd; dma_addr_t bd_phys; unsigned int pc_from_device, pc_to_device; + unsigned int device_to_device; unsigned long flags; - dma_addr_t per_address; + dma_addr_t per_address, per_address2; unsigned long event_mask[2]; unsigned long watermark_level; u32 shp_addr, per_addr; @@ -328,6 +383,8 @@ struct sdma_engine { u32 script_number; struct sdma_script_start_addrs *script_addrs; const struct sdma_driver_data *drvdata; + u32 spba_start_addr; + u32 spba_end_addr; }; static struct sdma_driver_data sdma_imx31 = { @@ -705,6 +762,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac, sdmac->pc_from_device = 0; sdmac->pc_to_device = 0; + sdmac->device_to_device = 0; switch (peripheral_type) { case IMX_DMATYPE_MEMORY: @@ -780,6 +838,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac, sdmac->pc_from_device = per_2_emi; sdmac->pc_to_device = emi_2_per; + sdmac->device_to_device = per_2_per; } static int sdma_load_context(struct sdma_channel *sdmac) @@ -792,11 +851,12 @@ static int sdma_load_context(struct sdma_channel *sdmac) int ret; unsigned long flags; - if (sdmac->direction == DMA_DEV_TO_MEM) { + if (sdmac->direction == DMA_DEV_TO_MEM) load_address = sdmac->pc_from_device; - } else { + else if (sdmac->direction == DMA_DEV_TO_DEV) + load_address = sdmac->device_to_device; + else load_address = sdmac->pc_to_device; - } if (load_address < 0) return load_address; @@ -851,6 +911,46 @@ static int sdma_disable_channel(struct dma_chan *chan) return 0; } +static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac) +{ + struct sdma_engine *sdma = sdmac->sdma; + + int lwml = sdmac->watermark_level & SDMA_WATERMARK_LEVEL_LWML; + int hwml = (sdmac->watermark_level & SDMA_WATERMARK_LEVEL_HWML) >> 16; + + set_bit(sdmac->event_id0 % 32, &sdmac->event_mask[1]); + set_bit(sdmac->event_id1 % 32, &sdmac->event_mask[0]); + + if (sdmac->event_id0 > 31) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_LWE; + + if (sdmac->event_id1 > 31) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_HWE; + + /* + * If LWML(src_maxburst) > HWML(dst_maxburst), we need + * swap LWML and HWML of INFO(A.3.2.5.1), also need swap + * r0(event_mask[1]) and r1(event_mask[0]). + */ + if (lwml > hwml) { + sdmac->watermark_level &= ~(SDMA_WATERMARK_LEVEL_LWML | + SDMA_WATERMARK_LEVEL_HWML); + sdmac->watermark_level |= hwml; + sdmac->watermark_level |= lwml << 16; + swap(sdmac->event_mask[0], sdmac->event_mask[1]); + } + + if (sdmac->per_address2 >= sdma->spba_start_addr && + sdmac->per_address2 <= sdma->spba_end_addr) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SP; + + if (sdmac->per_address >= sdma->spba_start_addr && + sdmac->per_address <= sdma->spba_end_addr) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_DP; + + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_CONT; +} + static int sdma_config_channel(struct dma_chan *chan) { struct sdma_channel *sdmac = to_sdma_chan(chan); @@ -869,6 +969,12 @@ static int sdma_config_channel(struct dma_chan *chan) sdma_event_enable(sdmac, sdmac->event_id0); } + if (sdmac->event_id1) { + if (sdmac->event_id1 >= sdmac->sdma->drvdata->num_events) + return -EINVAL; + sdma_event_enable(sdmac, sdmac->event_id1); + } + switch (sdmac->peripheral_type) { case IMX_DMATYPE_DSP: sdma_config_ownership(sdmac, false, true, true); @@ -887,19 +993,17 @@ static int sdma_config_channel(struct dma_chan *chan) (sdmac->peripheral_type != IMX_DMATYPE_DSP)) { /* Handle multiple event channels differently */ if (sdmac->event_id1) { - sdmac->event_mask[1] = BIT(sdmac->event_id1 % 32); - if (sdmac->event_id1 > 31) - __set_bit(31, &sdmac->watermark_level); - sdmac->event_mask[0] = BIT(sdmac->event_id0 % 32); - if (sdmac->event_id0 > 31) - __set_bit(30, &sdmac->watermark_level); - } else { + if (sdmac->peripheral_type == IMX_DMATYPE_ASRC_SP || + sdmac->peripheral_type == IMX_DMATYPE_ASRC) + sdma_set_watermarklevel_for_p2p(sdmac); + } else __set_bit(sdmac->event_id0, sdmac->event_mask); - } + /* Watermark Level */ sdmac->watermark_level |= sdmac->watermark_level; /* Address */ sdmac->shp_addr = sdmac->per_address; + sdmac->per_addr = sdmac->per_address2; } else { sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */ } @@ -987,17 +1091,22 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan) sdmac->peripheral_type = data->peripheral_type; sdmac->event_id0 = data->dma_request; + sdmac->event_id1 = data->dma_request2; - clk_enable(sdmac->sdma->clk_ipg); - clk_enable(sdmac->sdma->clk_ahb); + ret = clk_enable(sdmac->sdma->clk_ipg); + if (ret) + return ret; + ret = clk_enable(sdmac->sdma->clk_ahb); + if (ret) + goto disable_clk_ipg; ret = sdma_request_channel(sdmac); if (ret) - return ret; + goto disable_clk_ahb; ret = sdma_set_channel_priority(sdmac, prio); if (ret) - return ret; + goto disable_clk_ahb; dma_async_tx_descriptor_init(&sdmac->desc, chan); sdmac->desc.tx_submit = sdma_tx_submit; @@ -1005,6 +1114,12 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan) sdmac->desc.flags = DMA_CTRL_ACK; return 0; + +disable_clk_ahb: + clk_disable(sdmac->sdma->clk_ahb); +disable_clk_ipg: + clk_disable(sdmac->sdma->clk_ipg); + return ret; } static void sdma_free_chan_resources(struct dma_chan *chan) @@ -1221,6 +1336,14 @@ static int sdma_config(struct dma_chan *chan, sdmac->watermark_level = dmaengine_cfg->src_maxburst * dmaengine_cfg->src_addr_width; sdmac->word_size = dmaengine_cfg->src_addr_width; + } else if (dmaengine_cfg->direction == DMA_DEV_TO_DEV) { + sdmac->per_address2 = dmaengine_cfg->src_addr; + sdmac->per_address = dmaengine_cfg->dst_addr; + sdmac->watermark_level = dmaengine_cfg->src_maxburst & + SDMA_WATERMARK_LEVEL_LWML; + sdmac->watermark_level |= (dmaengine_cfg->dst_maxburst << 16) & + SDMA_WATERMARK_LEVEL_HWML; + sdmac->word_size = dmaengine_cfg->dst_addr_width; } else { sdmac->per_address = dmaengine_cfg->dst_addr; sdmac->watermark_level = dmaengine_cfg->dst_maxburst * @@ -1337,6 +1460,72 @@ err_firmware: release_firmware(fw); } +#define EVENT_REMAP_CELLS 3 + +static int __init sdma_event_remap(struct sdma_engine *sdma) +{ + struct device_node *np = sdma->dev->of_node; + struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0); + struct property *event_remap; + struct regmap *gpr; + char propname[] = "fsl,sdma-event-remap"; + u32 reg, val, shift, num_map, i; + int ret = 0; + + if (IS_ERR(np) || IS_ERR(gpr_np)) + goto out; + + event_remap = of_find_property(np, propname, NULL); + num_map = event_remap ? (event_remap->length / sizeof(u32)) : 0; + if (!num_map) { + dev_warn(sdma->dev, "no event needs to be remapped\n"); + goto out; + } else if (num_map % EVENT_REMAP_CELLS) { + dev_err(sdma->dev, "the property %s must modulo %d\n", + propname, EVENT_REMAP_CELLS); + ret = -EINVAL; + goto out; + } + + gpr = syscon_node_to_regmap(gpr_np); + if (IS_ERR(gpr)) { + dev_err(sdma->dev, "failed to get gpr regmap\n"); + ret = PTR_ERR(gpr); + goto out; + } + + for (i = 0; i < num_map; i += EVENT_REMAP_CELLS) { + ret = of_property_read_u32_index(np, propname, i, ®); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i); + goto out; + } + + ret = of_property_read_u32_index(np, propname, i + 1, &shift); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i + 1); + goto out; + } + + ret = of_property_read_u32_index(np, propname, i + 2, &val); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i + 2); + goto out; + } + + regmap_update_bits(gpr, reg, BIT(shift), val << shift); + } + +out: + if (!IS_ERR(gpr_np)) + of_node_put(gpr_np); + + return ret; +} + static int sdma_get_firmware(struct sdma_engine *sdma, const char *fw_name) { @@ -1354,8 +1543,12 @@ static int sdma_init(struct sdma_engine *sdma) int i, ret; dma_addr_t ccb_phys; - clk_enable(sdma->clk_ipg); - clk_enable(sdma->clk_ahb); + ret = clk_enable(sdma->clk_ipg); + if (ret) + return ret; + ret = clk_enable(sdma->clk_ahb); + if (ret) + goto disable_clk_ipg; /* Be sure SDMA has not started yet */ writel_relaxed(0, sdma->regs + SDMA_H_C0PTR); @@ -1411,8 +1604,9 @@ static int sdma_init(struct sdma_engine *sdma) return 0; err_dma_alloc: - clk_disable(sdma->clk_ipg); clk_disable(sdma->clk_ahb); +disable_clk_ipg: + clk_disable(sdma->clk_ipg); dev_err(sdma->dev, "initialisation failed with %d\n", ret); return ret; } @@ -1444,6 +1638,14 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec, data.dma_request = dma_spec->args[0]; data.peripheral_type = dma_spec->args[1]; data.priority = dma_spec->args[2]; + /* + * init dma_request2 to zero, which is not used by the dts. + * For P2P, dma_request2 is init from dma_request_channel(), + * chan->private will point to the imx_dma_data, and in + * device_alloc_chan_resources(), imx_dma_data.dma_request2 will + * be set to sdmac->event_id1. + */ + data.dma_request2 = 0; return dma_request_channel(mask, sdma_filter_fn, &data); } @@ -1453,10 +1655,12 @@ static int sdma_probe(struct platform_device *pdev) const struct of_device_id *of_id = of_match_device(sdma_dt_ids, &pdev->dev); struct device_node *np = pdev->dev.of_node; + struct device_node *spba_bus; const char *fw_name; int ret; int irq; struct resource *iores; + struct resource spba_res; struct sdma_platform_data *pdata = dev_get_platdata(&pdev->dev); int i; struct sdma_engine *sdma; @@ -1551,6 +1755,10 @@ static int sdma_probe(struct platform_device *pdev) if (ret) goto err_init; + ret = sdma_event_remap(sdma); + if (ret) + goto err_init; + if (sdma->drvdata->script_addrs) sdma_add_scripts(sdma, sdma->drvdata->script_addrs); if (pdata && pdata->script_addrs) @@ -1608,6 +1816,14 @@ static int sdma_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to register controller\n"); goto err_register; } + + spba_bus = of_find_compatible_node(NULL, NULL, "fsl,spba-bus"); + ret = of_address_to_resource(spba_bus, 0, &spba_res); + if (!ret) { + sdma->spba_start_addr = spba_res.start; + sdma->spba_end_addr = spba_res.end; + } + of_node_put(spba_bus); } dev_info(sdma->dev, "initialized\n"); diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile index 0ff7270af25b..cf5fedbe2b75 100644 --- a/drivers/dma/ioat/Makefile +++ b/drivers/dma/ioat/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-y := pci.o dma.o dma_v2.o dma_v3.o dca.o +ioatdma-y := init.o dma.o prep.o dca.o sysfs.o diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index ea1e107ae884..2cb7c308d5c7 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -31,7 +31,6 @@ #include "dma.h" #include "registers.h" -#include "dma_v2.h" /* * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 @@ -71,14 +70,6 @@ static inline int dca2_tag_map_valid(u8 *tag_map) #define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x)) #define IOAT_TAG_MAP_LEN 8 -static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; -static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; -static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), }; -static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 }; - /* pack PCI B/D/F into a u16 */ static inline u16 dcaid_from_pcidev(struct pci_dev *pci) { @@ -126,96 +117,6 @@ struct ioat_dca_priv { struct ioat_dca_slot req_slots[0]; }; -/* 5000 series chipset DCA Port Requester ID Table Entry Format - * [15:8] PCI-Express Bus Number - * [7:3] PCI-Express Device Number - * [2:0] PCI-Express Function Number - * - * 5000 series chipset DCA control register format - * [7:1] Reserved (0) - * [0] Ignore Function Number - */ - -static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 id; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - id = dcaid_from_pcidev(pdev); - - if (ioatdca->requester_count == ioatdca->max_requesters) - return -ENODEV; - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == NULL) { - /* found an empty slot */ - ioatdca->requester_count++; - ioatdca->req_slots[i].pdev = pdev; - ioatdca->req_slots[i].rid = id; - writew(id, ioatdca->dca_base + (i * 4)); - /* make sure the ignore function bit is off */ - writeb(0, ioatdca->dca_base + (i * 4) + 2); - return i; - } - } - /* Error, ioatdma->requester_count is out of whack */ - return -EFAULT; -} - -static int ioat_dca_remove_requester(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) { - writew(0, ioatdca->dca_base + (i * 4)); - ioatdca->req_slots[i].pdev = NULL; - ioatdca->req_slots[i].rid = 0; - ioatdca->requester_count--; - return i; - } - } - return -ENODEV; -} - -static u8 ioat_dca_get_tag(struct dca_provider *dca, - struct device *dev, - int cpu) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - int i, apic_id, bit, value; - u8 entry, tag; - - tag = 0; - apic_id = cpu_physical_id(cpu); - - for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { - entry = ioatdca->tag_map[i]; - if (entry & DCA_TAG_MAP_VALID) { - bit = entry & ~DCA_TAG_MAP_VALID; - value = (apic_id & (1 << bit)) ? 1 : 0; - } else { - value = entry ? 1 : 0; - } - tag |= (value << i); - } - return tag; -} - static int ioat_dca_dev_managed(struct dca_provider *dca, struct device *dev) { @@ -231,260 +132,7 @@ static int ioat_dca_dev_managed(struct dca_provider *dca, return 0; } -static struct dca_ops ioat_dca_ops = { - .add_requester = ioat_dca_add_requester, - .remove_requester = ioat_dca_remove_requester, - .get_tag = ioat_dca_get_tag, - .dev_managed = ioat_dca_dev_managed, -}; - - -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) -{ - struct dca_provider *dca; - struct ioat_dca_priv *ioatdca; - u8 *tag_map = NULL; - int i; - int err; - u8 version; - u8 max_requesters; - - if (!system_has_dca_enabled(pdev)) - return NULL; - - /* I/OAT v1 systems must have a known tag_map to support DCA */ - switch (pdev->vendor) { - case PCI_VENDOR_ID_INTEL: - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT: - tag_map = ioat_tag_map_BNB; - break; - case PCI_DEVICE_ID_INTEL_IOAT_CNB: - tag_map = ioat_tag_map_CNB; - break; - case PCI_DEVICE_ID_INTEL_IOAT_SCNB: - tag_map = ioat_tag_map_SCNB; - break; - } - break; - case PCI_VENDOR_ID_UNISYS: - switch (pdev->device) { - case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR: - tag_map = ioat_tag_map_UNISYS; - break; - } - break; - } - if (tag_map == NULL) - return NULL; - - version = readb(iobase + IOAT_VER_OFFSET); - if (version == IOAT_VER_3_0) - max_requesters = IOAT3_DCA_MAX_REQ; - else - max_requesters = IOAT_DCA_MAX_REQ; - - dca = alloc_dca_provider(&ioat_dca_ops, - sizeof(*ioatdca) + - (sizeof(struct ioat_dca_slot) * max_requesters)); - if (!dca) - return NULL; - - ioatdca = dca_priv(dca); - ioatdca->max_requesters = max_requesters; - ioatdca->dca_base = iobase + 0x54; - - /* copy over the APIC ID to DCA tag mapping */ - for (i = 0; i < IOAT_TAG_MAP_LEN; i++) - ioatdca->tag_map[i] = tag_map[i]; - - err = register_dca_provider(dca, &pdev->dev); - if (err) { - free_dca_provider(dca); - return NULL; - } - - return dca; -} - - -static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 id; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - id = dcaid_from_pcidev(pdev); - - if (ioatdca->requester_count == ioatdca->max_requesters) - return -ENODEV; - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == NULL) { - /* found an empty slot */ - ioatdca->requester_count++; - ioatdca->req_slots[i].pdev = pdev; - ioatdca->req_slots[i].rid = id; - global_req_table = - readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); - writel(id | IOAT_DCA_GREQID_VALID, - ioatdca->iobase + global_req_table + (i * 4)); - return i; - } - } - /* Error, ioatdma->requester_count is out of whack */ - return -EFAULT; -} - -static int ioat2_dca_remove_requester(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) { - global_req_table = - readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); - writel(0, ioatdca->iobase + global_req_table + (i * 4)); - ioatdca->req_slots[i].pdev = NULL; - ioatdca->req_slots[i].rid = 0; - ioatdca->requester_count--; - return i; - } - } - return -ENODEV; -} - -static u8 ioat2_dca_get_tag(struct dca_provider *dca, - struct device *dev, - int cpu) -{ - u8 tag; - - tag = ioat_dca_get_tag(dca, dev, cpu); - tag = (~tag) & 0x1F; - return tag; -} - -static struct dca_ops ioat2_dca_ops = { - .add_requester = ioat2_dca_add_requester, - .remove_requester = ioat2_dca_remove_requester, - .get_tag = ioat2_dca_get_tag, - .dev_managed = ioat_dca_dev_managed, -}; - -static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) -{ - int slots = 0; - u32 req; - u16 global_req_table; - - global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET); - if (global_req_table == 0) - return 0; - do { - req = readl(iobase + global_req_table + (slots * sizeof(u32))); - slots++; - } while ((req & IOAT_DCA_GREQID_LASTID) == 0); - - return slots; -} - -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) -{ - struct dca_provider *dca; - struct ioat_dca_priv *ioatdca; - int slots; - int i; - int err; - u32 tag_map; - u16 dca_offset; - u16 csi_fsb_control; - u16 pcie_control; - u8 bit; - - if (!system_has_dca_enabled(pdev)) - return NULL; - - dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); - if (dca_offset == 0) - return NULL; - - slots = ioat2_dca_count_dca_slots(iobase, dca_offset); - if (slots == 0) - return NULL; - - dca = alloc_dca_provider(&ioat2_dca_ops, - sizeof(*ioatdca) - + (sizeof(struct ioat_dca_slot) * slots)); - if (!dca) - return NULL; - - ioatdca = dca_priv(dca); - ioatdca->iobase = iobase; - ioatdca->dca_base = iobase + dca_offset; - ioatdca->max_requesters = slots; - - /* some bios might not know to turn these on */ - csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); - if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) { - csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH; - writew(csi_fsb_control, - ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); - } - pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); - if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) { - pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR; - writew(pcie_control, - ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); - } - - - /* TODO version, compatibility and configuration checks */ - - /* copy out the APIC to DCA tag map */ - tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET); - for (i = 0; i < 5; i++) { - bit = (tag_map >> (4 * i)) & 0x0f; - if (bit < 8) - ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; - else - ioatdca->tag_map[i] = 0; - } - - if (!dca2_tag_map_valid(ioatdca->tag_map)) { - WARN_TAINT_ONCE(1, TAINT_FIRMWARE_WORKAROUND, - "%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", - dev_driver_string(&pdev->dev), - dev_name(&pdev->dev)); - free_dca_provider(dca); - return NULL; - } - - err = register_dca_provider(dca, &pdev->dev); - if (err) { - free_dca_provider(dca); - return NULL; - } - - return dca; -} - -static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) +static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) { struct ioat_dca_priv *ioatdca = dca_priv(dca); struct pci_dev *pdev; @@ -518,7 +166,7 @@ static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) return -EFAULT; } -static int ioat3_dca_remove_requester(struct dca_provider *dca, +static int ioat_dca_remove_requester(struct dca_provider *dca, struct device *dev) { struct ioat_dca_priv *ioatdca = dca_priv(dca); @@ -545,7 +193,7 @@ static int ioat3_dca_remove_requester(struct dca_provider *dca, return -ENODEV; } -static u8 ioat3_dca_get_tag(struct dca_provider *dca, +static u8 ioat_dca_get_tag(struct dca_provider *dca, struct device *dev, int cpu) { @@ -576,14 +224,14 @@ static u8 ioat3_dca_get_tag(struct dca_provider *dca, return tag; } -static struct dca_ops ioat3_dca_ops = { - .add_requester = ioat3_dca_add_requester, - .remove_requester = ioat3_dca_remove_requester, - .get_tag = ioat3_dca_get_tag, +static struct dca_ops ioat_dca_ops = { + .add_requester = ioat_dca_add_requester, + .remove_requester = ioat_dca_remove_requester, + .get_tag = ioat_dca_get_tag, .dev_managed = ioat_dca_dev_managed, }; -static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) +static int ioat_dca_count_dca_slots(void *iobase, u16 dca_offset) { int slots = 0; u32 req; @@ -618,7 +266,7 @@ static inline int dca3_tag_map_invalid(u8 *tag_map) (tag_map[4] == DCA_TAG_MAP_VALID)); } -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; @@ -645,11 +293,11 @@ struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) if (dca_offset == 0) return NULL; - slots = ioat3_dca_count_dca_slots(iobase, dca_offset); + slots = ioat_dca_count_dca_slots(iobase, dca_offset); if (slots == 0) return NULL; - dca = alloc_dca_provider(&ioat3_dca_ops, + dca = alloc_dca_provider(&ioat_dca_ops, sizeof(*ioatdca) + (sizeof(struct ioat_dca_slot) * slots)); if (!dca) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index ee0aa9f4ccfa..f66b7e640610 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1,6 +1,6 @@ /* * Intel I/OAT DMA Linux driver - * Copyright(c) 2004 - 2009 Intel Corporation. + * Copyright(c) 2004 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -31,31 +31,23 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/prefetch.h> -#include <linux/i7300_idle.h> #include "dma.h" #include "registers.h" #include "hw.h" #include "../dmaengine.h" -int ioat_pending_level = 4; -module_param(ioat_pending_level, int, 0644); -MODULE_PARM_DESC(ioat_pending_level, - "high-water mark for pushing ioat descriptors (default: 4)"); - -/* internal functions */ -static void ioat1_cleanup(struct ioat_dma_chan *ioat); -static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat); +static void ioat_eh(struct ioatdma_chan *ioat_chan); /** * ioat_dma_do_interrupt - handler used for single vector interrupt mode * @irq: interrupt id * @data: interrupt data */ -static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) +irqreturn_t ioat_dma_do_interrupt(int irq, void *data) { struct ioatdma_device *instance = data; - struct ioat_chan_common *chan; + struct ioatdma_chan *ioat_chan; unsigned long attnstatus; int bit; u8 intrctrl; @@ -72,9 +64,9 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) { - chan = ioat_chan_by_index(instance, bit); - if (test_bit(IOAT_RUN, &chan->state)) - tasklet_schedule(&chan->cleanup_task); + ioat_chan = ioat_chan_by_index(instance, bit); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + tasklet_schedule(&ioat_chan->cleanup_task); } writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); @@ -86,1161 +78,912 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) * @irq: interrupt id * @data: interrupt data */ -static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) +irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) { - struct ioat_chan_common *chan = data; + struct ioatdma_chan *ioat_chan = data; - if (test_bit(IOAT_RUN, &chan->state)) - tasklet_schedule(&chan->cleanup_task); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + tasklet_schedule(&ioat_chan->cleanup_task); return IRQ_HANDLED; } -/* common channel initialization */ -void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx) +void ioat_stop(struct ioatdma_chan *ioat_chan) { - struct dma_device *dma = &device->common; - struct dma_chan *c = &chan->common; - unsigned long data = (unsigned long) c; - - chan->device = device; - chan->reg_base = device->reg_base + (0x80 * (idx + 1)); - spin_lock_init(&chan->cleanup_lock); - chan->common.device = dma; - dma_cookie_init(&chan->common); - list_add_tail(&chan->common.device_node, &dma->channels); - device->idx[idx] = chan; - init_timer(&chan->timer); - chan->timer.function = device->timer_fn; - chan->timer.data = data; - tasklet_init(&chan->cleanup_task, device->cleanup_fn, data); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct pci_dev *pdev = ioat_dma->pdev; + int chan_id = chan_num(ioat_chan); + struct msix_entry *msix; + + /* 1/ stop irq from firing tasklets + * 2/ stop the tasklet from re-arming irqs + */ + clear_bit(IOAT_RUN, &ioat_chan->state); + + /* flush inflight interrupts */ + switch (ioat_dma->irq_mode) { + case IOAT_MSIX: + msix = &ioat_dma->msix_entries[chan_id]; + synchronize_irq(msix->vector); + break; + case IOAT_MSI: + case IOAT_INTX: + synchronize_irq(pdev->irq); + break; + default: + break; + } + + /* flush inflight timers */ + del_timer_sync(&ioat_chan->timer); + + /* flush inflight tasklet runs */ + tasklet_kill(&ioat_chan->cleanup_task); + + /* final cleanup now that everything is quiesced and can't re-arm */ + ioat_cleanup_event((unsigned long)&ioat_chan->dma_chan); } -/** - * ioat1_dma_enumerate_channels - find and initialize the device's channels - * @device: the device to be enumerated - */ -static int ioat1_enumerate_channels(struct ioatdma_device *device) +static void __ioat_issue_pending(struct ioatdma_chan *ioat_chan) { - u8 xfercap_scale; - u32 xfercap; - int i; - struct ioat_dma_chan *ioat; - struct device *dev = &device->pdev->dev; - struct dma_device *dma = &device->common; - - INIT_LIST_HEAD(&dma->channels); - dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); - dma->chancnt &= 0x1f; /* bits [4:0] valid */ - if (dma->chancnt > ARRAY_SIZE(device->idx)) { - dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", - dma->chancnt, ARRAY_SIZE(device->idx)); - dma->chancnt = ARRAY_SIZE(device->idx); - } - xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); - xfercap_scale &= 0x1f; /* bits [4:0] valid */ - xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); - dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap); - -#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) - dma->chancnt--; -#endif - for (i = 0; i < dma->chancnt; i++) { - ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); - if (!ioat) - break; + ioat_chan->dmacount += ioat_ring_pending(ioat_chan); + ioat_chan->issued = ioat_chan->head; + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, + ioat_chan->issued, ioat_chan->dmacount); +} + +void ioat_issue_pending(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); - ioat_init_channel(device, &ioat->base, i); - ioat->xfercap = xfercap; - spin_lock_init(&ioat->desc_lock); - INIT_LIST_HEAD(&ioat->free_desc); - INIT_LIST_HEAD(&ioat->used_desc); + if (ioat_ring_pending(ioat_chan)) { + spin_lock_bh(&ioat_chan->prep_lock); + __ioat_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); } - dma->chancnt = i; - return i; } /** - * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended - * descriptors to hw - * @chan: DMA channel handle + * ioat_update_pending - log pending descriptors + * @ioat: ioat+ channel + * + * Check if the number of unsubmitted descriptors has exceeded the + * watermark. Called with prep_lock held */ -static inline void -__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) +static void ioat_update_pending(struct ioatdma_chan *ioat_chan) { - void __iomem *reg_base = ioat->base.reg_base; - - dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n", - __func__, ioat->pending); - ioat->pending = 0; - writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET); + if (ioat_ring_pending(ioat_chan) > ioat_pending_level) + __ioat_issue_pending(ioat_chan); } -static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) +static void __ioat_start_null_desc(struct ioatdma_chan *ioat_chan) { - struct ioat_dma_chan *ioat = to_ioat_chan(chan); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; - if (ioat->pending > 0) { - spin_lock_bh(&ioat->desc_lock); - __ioat1_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); + if (ioat_ring_space(ioat_chan) < 1) { + dev_err(to_dev(ioat_chan), + "Unable to start null desc - ring full\n"); + return; } + + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued); + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + ioat_set_chainaddr(ioat_chan, desc->txd.phys); + dump_desc_dbg(ioat_chan, desc); + /* make sure descriptors are written before we submit */ + wmb(); + ioat_chan->head += 1; + __ioat_issue_pending(ioat_chan); } -/** - * ioat1_reset_channel - restart a channel - * @ioat: IOAT DMA channel handle - */ -static void ioat1_reset_channel(struct ioat_dma_chan *ioat) +void ioat_start_null_desc(struct ioatdma_chan *ioat_chan) { - struct ioat_chan_common *chan = &ioat->base; - void __iomem *reg_base = chan->reg_base; - u32 chansts, chanerr; - - dev_warn(to_dev(chan), "reset\n"); - chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); - chansts = *chan->completion & IOAT_CHANSTS_STATUS; - if (chanerr) { - dev_err(to_dev(chan), - "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", - chan_num(chan), chansts, chanerr); - writel(chanerr, reg_base + IOAT_CHANERR_OFFSET); + spin_lock_bh(&ioat_chan->prep_lock); + __ioat_start_null_desc(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); +} + +static void __ioat_restart_chan(struct ioatdma_chan *ioat_chan) +{ + /* set the tail to be re-issued */ + ioat_chan->issued = ioat_chan->tail; + ioat_chan->dmacount = 0; + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, + ioat_chan->issued, ioat_chan->dmacount); + + if (ioat_ring_pending(ioat_chan)) { + struct ioat_ring_ent *desc; + + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail); + ioat_set_chainaddr(ioat_chan, desc->txd.phys); + __ioat_issue_pending(ioat_chan); + } else + __ioat_start_null_desc(ioat_chan); +} + +static int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + u32 status; + + status = ioat_chansts(ioat_chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(ioat_chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + if (tmo && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + status = ioat_chansts(ioat_chan); + cpu_relax(); } - /* - * whack it upside the head with a reset - * and wait for things to settle out. - * force the pending count to a really big negative - * to make sure no one forces an issue_pending - * while we're waiting. - */ + return err; +} - ioat->pending = INT_MIN; - writeb(IOAT_CHANCMD_RESET, - reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - set_bit(IOAT_RESET_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + RESET_DELAY); +static int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + + ioat_reset(ioat_chan); + while (ioat_reset_pending(ioat_chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + cpu_relax(); + } + + return err; } -static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) +static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx) + __releases(&ioat_chan->prep_lock) { struct dma_chan *c = tx->chan; - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); - struct ioat_chan_common *chan = &ioat->base; - struct ioat_desc_sw *first; - struct ioat_desc_sw *chain_tail; + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); dma_cookie_t cookie; - spin_lock_bh(&ioat->desc_lock); - /* cookie incr and addition to used_list must be atomic */ cookie = dma_cookie_assign(tx); - dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + dev_dbg(to_dev(ioat_chan), "%s: cookie: %d\n", __func__, cookie); + + if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); - /* write address into NextDescriptor field of last desc in chain */ - first = to_ioat_desc(desc->tx_list.next); - chain_tail = to_ioat_desc(ioat->used_desc.prev); - /* make descriptor updates globally visible before chaining */ + /* make descriptor updates visible before advancing ioat->head, + * this is purposefully not smp_wmb() since we are also + * publishing the descriptor updates to a dma device + */ wmb(); - chain_tail->hw->next = first->txd.phys; - list_splice_tail_init(&desc->tx_list, &ioat->used_desc); - dump_desc_dbg(ioat, chain_tail); - dump_desc_dbg(ioat, first); - if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat_chan->head += ioat_chan->produce; - ioat->active += desc->hw->tx_cnt; - ioat->pending += desc->hw->tx_cnt; - if (ioat->pending >= ioat_pending_level) - __ioat1_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); + ioat_update_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); return cookie; } -/** - * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair - * @ioat: the channel supplying the memory pool for the descriptors - * @flags: allocation flags - */ -static struct ioat_desc_sw * -ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) +static struct ioat_ring_ent * +ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) { - struct ioat_dma_descriptor *desc; - struct ioat_desc_sw *desc_sw; - struct ioatdma_device *ioatdma_device; + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + struct ioatdma_device *ioat_dma; dma_addr_t phys; - ioatdma_device = ioat->base.device; - desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); - if (unlikely(!desc)) + ioat_dma = to_ioatdma_device(chan->device); + hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys); + if (!hw) return NULL; + memset(hw, 0, sizeof(*hw)); - desc_sw = kzalloc(sizeof(*desc_sw), flags); - if (unlikely(!desc_sw)) { - pci_pool_free(ioatdma_device->dma_pool, desc, phys); + desc = kmem_cache_zalloc(ioat_cache, flags); + if (!desc) { + pci_pool_free(ioat_dma->dma_pool, hw, phys); return NULL; } - memset(desc, 0, sizeof(*desc)); + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = ioat_tx_submit_unlock; + desc->hw = hw; + desc->txd.phys = phys; + return desc; +} - INIT_LIST_HEAD(&desc_sw->tx_list); - dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); - desc_sw->txd.tx_submit = ioat1_tx_submit; - desc_sw->hw = desc; - desc_sw->txd.phys = phys; - set_desc_id(desc_sw, -1); +void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) +{ + struct ioatdma_device *ioat_dma; - return desc_sw; + ioat_dma = to_ioatdma_device(chan->device); + pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys); + kmem_cache_free(ioat_cache, desc); } -static int ioat_initial_desc_count = 256; -module_param(ioat_initial_desc_count, int, 0644); -MODULE_PARM_DESC(ioat_initial_desc_count, - "ioat1: initial descriptors per channel (default: 256)"); -/** - * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors - * @chan: the channel to be filled out - */ -static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) +struct ioat_ring_ent ** +ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) { - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioat_desc_sw *desc; - u32 chanerr; + struct ioat_ring_ent **ring; + int descs = 1 << order; int i; - LIST_HEAD(tmp_list); - - /* have we already been set up? */ - if (!list_empty(&ioat->free_desc)) - return ioat->desccount; - /* Setup register to interrupt and write completion status on error */ - writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); + if (order > ioat_get_max_alloc_order()) + return NULL; - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), flags); + if (!ring) + return NULL; + for (i = 0; i < descs; i++) { + ring[i] = ioat_alloc_ring_ent(c, flags); + if (!ring[i]) { + while (i--) + ioat_free_ring_ent(ring[i], c); + kfree(ring); + return NULL; + } + set_desc_id(ring[i], i); } - /* Allocate descriptors */ - for (i = 0; i < ioat_initial_desc_count; i++) { - desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL); - if (!desc) { - dev_err(to_dev(chan), "Only %d initial descriptors\n", i); - break; - } - set_desc_id(desc, i); - list_add_tail(&desc->node, &tmp_list); + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; } - spin_lock_bh(&ioat->desc_lock); - ioat->desccount = i; - list_splice(&tmp_list, &ioat->free_desc); - spin_unlock_bh(&ioat->desc_lock); - - /* allocate a completion writeback area */ - /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - chan->completion = pci_pool_alloc(chan->device->completion_pool, - GFP_KERNEL, &chan->completion_dma); - memset(chan->completion, 0, sizeof(*chan->completion)); - writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) chan->completion_dma) >> 32, - chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - set_bit(IOAT_RUN, &chan->state); - ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ - dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", - __func__, ioat->desccount); - return ioat->desccount; + ring[i]->hw->next = ring[0]->txd.phys; + + return ring; } -void ioat_stop(struct ioat_chan_common *chan) +static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) { - struct ioatdma_device *device = chan->device; - struct pci_dev *pdev = device->pdev; - int chan_id = chan_num(chan); - struct msix_entry *msix; + /* reshape differs from normal ring allocation in that we want + * to allocate a new software ring while only + * extending/truncating the hardware ring + */ + struct dma_chan *c = &ioat_chan->dma_chan; + const u32 curr_size = ioat_ring_size(ioat_chan); + const u16 active = ioat_ring_active(ioat_chan); + const u32 new_size = 1 << order; + struct ioat_ring_ent **ring; + u32 i; + + if (order > ioat_get_max_alloc_order()) + return false; - /* 1/ stop irq from firing tasklets - * 2/ stop the tasklet from re-arming irqs + /* double check that we have at least 1 free descriptor */ + if (active == curr_size) + return false; + + /* when shrinking, verify that we can hold the current active + * set in the new ring */ - clear_bit(IOAT_RUN, &chan->state); + if (active >= new_size) + return false; - /* flush inflight interrupts */ - switch (device->irq_mode) { - case IOAT_MSIX: - msix = &device->msix_entries[chan_id]; - synchronize_irq(msix->vector); - break; - case IOAT_MSI: - case IOAT_INTX: - synchronize_irq(pdev->irq); - break; - default: - break; - } + /* allocate the array to hold the software ring */ + ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); + if (!ring) + return false; - /* flush inflight timers */ - del_timer_sync(&chan->timer); + /* allocate/trim descriptors as needed */ + if (new_size > curr_size) { + /* copy current descriptors to the new ring */ + for (i = 0; i < curr_size; i++) { + u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); - /* flush inflight tasklet runs */ - tasklet_kill(&chan->cleanup_task); + ring[new_idx] = ioat_chan->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } - /* final cleanup now that everything is quiesced and can't re-arm */ - device->cleanup_fn((unsigned long) &chan->common); -} + /* add new descriptors to the ring */ + for (i = curr_size; i < new_size; i++) { + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); -/** - * ioat1_dma_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned - */ -static void ioat1_dma_free_chan_resources(struct dma_chan *c) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *ioatdma_device = chan->device; - struct ioat_desc_sw *desc, *_desc; - int in_use_descs = 0; - - /* Before freeing channel resources first check - * if they have been previously allocated for this channel. - */ - if (ioat->desccount == 0) - return; + ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT); + if (!ring[new_idx]) { + while (i--) { + u16 new_idx = (ioat_chan->tail+i) & + (new_size-1); + + ioat_free_ring_ent(ring[new_idx], c); + } + kfree(ring); + return false; + } + set_desc_id(ring[new_idx], new_idx); + } - ioat_stop(chan); + /* hw link new descriptors */ + for (i = curr_size-1; i < new_size; i++) { + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); + struct ioat_ring_ent *next = + ring[(new_idx+1) & (new_size-1)]; + struct ioat_dma_descriptor *hw = ring[new_idx]->hw; - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - mdelay(100); - - spin_lock_bh(&ioat->desc_lock); - list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { - dev_dbg(to_dev(chan), "%s: freeing %d from used list\n", - __func__, desc_id(desc)); - dump_desc_dbg(ioat, desc); - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, - &ioat->free_desc, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); + hw->next = next->txd.phys; + } + } else { + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *next; + + /* copy current descriptors to the new ring, dropping the + * removed descriptors + */ + for (i = 0; i < new_size; i++) { + u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); + + ring[new_idx] = ioat_chan->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* free deleted descriptors */ + for (i = new_size; i < curr_size; i++) { + struct ioat_ring_ent *ent; + + ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i); + ioat_free_ring_ent(ent, c); + } + + /* fix up hardware ring */ + hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw; + next = ring[(ioat_chan->tail+new_size) & (new_size-1)]; + hw->next = next->txd.phys; } - spin_unlock_bh(&ioat->desc_lock); - pci_pool_free(ioatdma_device->completion_pool, - chan->completion, - chan->completion_dma); + dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n", + __func__, new_size); - /* one is ok since we left it on there on purpose */ - if (in_use_descs > 1) - dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", - in_use_descs - 1); + kfree(ioat_chan->ring); + ioat_chan->ring = ring; + ioat_chan->alloc_order = order; - chan->last_completion = 0; - chan->completion_dma = 0; - ioat->pending = 0; - ioat->desccount = 0; + return true; } /** - * ioat1_dma_get_next_descriptor - return the next available descriptor - * @ioat: IOAT DMA channel handle - * - * Gets the next descriptor from the chain, and must be called with the - * channel's desc_lock held. Allocates more descriptors if the channel - * has run out. + * ioat_check_space_lock - verify space and grab ring producer lock + * @ioat: ioat,3 channel (ring) to operate on + * @num_descs: allocation length */ -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) +int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) + __acquires(&ioat_chan->prep_lock) { - struct ioat_desc_sw *new; + bool retry; - if (!list_empty(&ioat->free_desc)) { - new = to_ioat_desc(ioat->free_desc.next); - list_del(&new->node); - } else { - /* try to get another desc */ - new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); - if (!new) { - dev_err(to_dev(&ioat->base), "alloc failed\n"); - return NULL; - } + retry: + spin_lock_bh(&ioat_chan->prep_lock); + /* never allow the last descriptor to be consumed, we need at + * least one free at all times to allow for on-the-fly ring + * resizing. + */ + if (likely(ioat_ring_space(ioat_chan) > num_descs)) { + dev_dbg(to_dev(ioat_chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat_chan->head, + ioat_chan->tail, ioat_chan->issued); + ioat_chan->produce = num_descs; + return 0; /* with ioat->prep_lock held */ + } + retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + + /* is another cpu already trying to expand the ring? */ + if (retry) + goto retry; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1); + clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + /* if we were able to expand the ring retry the allocation */ + if (retry) + goto retry; + + dev_dbg_ratelimited(to_dev(ioat_chan), + "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat_chan->head, + ioat_chan->tail, ioat_chan->issued); + + /* progress reclaim in the allocation failure case we may be + * called under bh_disabled so we need to trigger the timer + * event directly + */ + if (time_is_before_jiffies(ioat_chan->timer.expires) + && timer_pending(&ioat_chan->timer)) { + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat_timer_event((unsigned long)ioat_chan); } - dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n", - __func__, desc_id(new)); - prefetch(new->hw); - return new; + + return -ENOMEM; } -static struct dma_async_tx_descriptor * -ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) +static bool desc_has_ext(struct ioat_ring_ent *desc) { - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_desc_sw *desc; - size_t copy; - LIST_HEAD(chain); - dma_addr_t src = dma_src; - dma_addr_t dest = dma_dest; - size_t total_len = len; - struct ioat_dma_descriptor *hw = NULL; - int tx_cnt = 0; - - spin_lock_bh(&ioat->desc_lock); - desc = ioat1_dma_get_next_descriptor(ioat); - do { - if (!desc) - break; - - tx_cnt++; - copy = min_t(size_t, len, ioat->xfercap); + struct ioat_dma_descriptor *hw = desc->hw; - hw = desc->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dest; + if (hw->ctl_f.op == IOAT_OP_XOR || + hw->ctl_f.op == IOAT_OP_XOR_VAL) { + struct ioat_xor_descriptor *xor = desc->xor; - list_add_tail(&desc->node, &chain); + if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) + return true; + } else if (hw->ctl_f.op == IOAT_OP_PQ || + hw->ctl_f.op == IOAT_OP_PQ_VAL) { + struct ioat_pq_descriptor *pq = desc->pq; - len -= copy; - dest += copy; - src += copy; - if (len) { - struct ioat_desc_sw *next; - - async_tx_ack(&desc->txd); - next = ioat1_dma_get_next_descriptor(ioat); - hw->next = next ? next->txd.phys : 0; - dump_desc_dbg(ioat, desc); - desc = next; - } else - hw->next = 0; - } while (len); - - if (!desc) { - struct ioat_chan_common *chan = &ioat->base; - - dev_err(to_dev(chan), - "chan%d - get_next_desc failed\n", chan_num(chan)); - list_splice(&chain, &ioat->free_desc); - spin_unlock_bh(&ioat->desc_lock); - return NULL; + if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) + return true; } - spin_unlock_bh(&ioat->desc_lock); - desc->txd.flags = flags; - desc->len = total_len; - list_splice(&chain, &desc->tx_list); - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->tx_cnt = tx_cnt; - dump_desc_dbg(ioat, desc); - - return &desc->txd; + return false; } -static void ioat1_cleanup_event(unsigned long data) +static void +ioat_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed) { - struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - - ioat1_cleanup(ioat); - if (!test_bit(IOAT_RUN, &chan->state)) + if (!sed) return; - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); + + dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); + kmem_cache_free(ioat_sed_cache, sed); } -dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan) +static u64 ioat_get_current_completion(struct ioatdma_chan *ioat_chan) { - dma_addr_t phys_complete; + u64 phys_complete; u64 completion; - completion = *chan->completion; + completion = *ioat_chan->completion; phys_complete = ioat_chansts_to_addr(completion); - dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, + dev_dbg(to_dev(ioat_chan), "%s: phys_complete: %#llx\n", __func__, (unsigned long long) phys_complete); - if (is_ioat_halted(completion)) { - u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", - chanerr); - - /* TODO do something to salvage the situation */ - } - return phys_complete; } -bool ioat_cleanup_preamble(struct ioat_chan_common *chan, - dma_addr_t *phys_complete) +static bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan, + u64 *phys_complete) { - *phys_complete = ioat_get_current_completion(chan); - if (*phys_complete == chan->last_completion) + *phys_complete = ioat_get_current_completion(ioat_chan); + if (*phys_complete == ioat_chan->last_completion) return false; - clear_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + clear_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); return true; } -static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete) +static void +desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) { - struct ioat_chan_common *chan = &ioat->base; - struct list_head *_desc, *n; - struct dma_async_tx_descriptor *tx; + struct ioat_dma_descriptor *hw = desc->hw; - dev_dbg(to_dev(chan), "%s: phys_complete: %llx\n", - __func__, (unsigned long long) phys_complete); - list_for_each_safe(_desc, n, &ioat->used_desc) { - struct ioat_desc_sw *desc; + switch (hw->ctl_f.op) { + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + { + struct ioat_pq_descriptor *pq = desc->pq; - prefetch(n); - desc = list_entry(_desc, typeof(*desc), node); - tx = &desc->txd; - /* - * Incoming DMA requests may use multiple descriptors, - * due to exceeding xfercap, perhaps. If so, only the - * last one will have a cookie, and require unmapping. - */ - dump_desc_dbg(ioat, desc); - if (tx->cookie) { - dma_cookie_complete(tx); - dma_descriptor_unmap(tx); - ioat->active -= desc->hw->tx_cnt; - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } + /* check if there's error written */ + if (!pq->dwbes_f.wbes) + return; - if (tx->phys != phys_complete) { - /* - * a completed entry, but not the last, so clean - * up if the client is done with the descriptor - */ - if (async_tx_test_ack(tx)) - list_move_tail(&desc->node, &ioat->free_desc); - } else { - /* - * last used desc. Do not remove, so we can - * append from it. - */ - - /* if nothing else is pending, cancel the - * completion timeout - */ - if (n == &ioat->used_desc) { - dev_dbg(to_dev(chan), - "%s cancel completion timeout\n", - __func__); - clear_bit(IOAT_COMPLETION_PENDING, &chan->state); - } + /* need to set a chanerr var for checking to clear later */ - /* TODO check status bits? */ - break; - } - } + if (pq->dwbes_f.p_val_err) + *desc->result |= SUM_CHECK_P_RESULT; + + if (pq->dwbes_f.q_val_err) + *desc->result |= SUM_CHECK_Q_RESULT; - chan->last_completion = phys_complete; + return; + } + default: + return; + } } /** - * ioat1_cleanup - cleanup up finished descriptors - * @chan: ioat channel to be cleaned up - * - * To prevent lock contention we defer cleanup when the locks are - * contended with a terminal timeout that forces cleanup and catches - * completion notification errors. + * __cleanup - reclaim used descriptors + * @ioat: channel (ring) to clean */ -static void ioat1_cleanup(struct ioat_dma_chan *ioat) +static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) { - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - - prefetch(chan->completion); - - if (!spin_trylock_bh(&chan->cleanup_lock)) - return; + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + bool seen_current = false; + int idx = ioat_chan->tail, i; + u16 active; - if (!ioat_cleanup_preamble(chan, &phys_complete)) { - spin_unlock_bh(&chan->cleanup_lock); - return; - } + dev_dbg(to_dev(ioat_chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued); - if (!spin_trylock_bh(&ioat->desc_lock)) { - spin_unlock_bh(&chan->cleanup_lock); + /* + * At restart of the channel, the completion address and the + * channel status will be 0 due to starting a new chain. Since + * it's new chain and the first descriptor "fails", there is + * nothing to clean up. We do not want to reap the entire submitted + * chain due to this 0 address value and then BUG. + */ + if (!phys_complete) return; - } - __cleanup(ioat, phys_complete); + active = ioat_ring_active(ioat_chan); + for (i = 0; i < active && !seen_current; i++) { + struct dma_async_tx_descriptor *tx; - spin_unlock_bh(&ioat->desc_lock); - spin_unlock_bh(&chan->cleanup_lock); -} - -static void ioat1_timer_event(unsigned long data) -{ - struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; + smp_read_barrier_depends(); + prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1)); + desc = ioat_get_ring_ent(ioat_chan, idx + i); + dump_desc_dbg(ioat_chan, desc); - dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state); + /* set err stat if we are using dwbes */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + desc_get_errstat(ioat_chan, desc); - spin_lock_bh(&chan->cleanup_lock); - if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) { - struct ioat_desc_sw *desc; - - spin_lock_bh(&ioat->desc_lock); + tx = &desc->txd; + if (tx->cookie) { + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } - /* restart active descriptors */ - desc = to_ioat_desc(ioat->used_desc.prev); - ioat_set_chainaddr(ioat, desc->txd.phys); - ioat_start(chan); + if (tx->phys == phys_complete) + seen_current = true; - ioat->pending = 0; - set_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - spin_unlock_bh(&ioat->desc_lock); - } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { - dma_addr_t phys_complete; + /* skip extended descriptors */ + if (desc_has_ext(desc)) { + BUG_ON(i + 1 >= active); + i++; + } - spin_lock_bh(&ioat->desc_lock); - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) - ioat1_reset_channel(ioat); - else { - u64 status = ioat_chansts(chan); - - /* manually update the last completion address */ - if (ioat_chansts_to_addr(status) != 0) - *chan->completion = status; - - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + /* cleanup super extended descriptors */ + if (desc->sed) { + ioat_free_sed(ioat_dma, desc->sed); + desc->sed = NULL; } - spin_unlock_bh(&ioat->desc_lock); } - spin_unlock_bh(&chan->cleanup_lock); -} - -enum dma_status -ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, - struct dma_tx_state *txstate) -{ - struct ioat_chan_common *chan = to_chan_common(c); - struct ioatdma_device *device = chan->device; - enum dma_status ret; - ret = dma_cookie_status(c, cookie, txstate); - if (ret == DMA_COMPLETE) - return ret; + /* finish all descriptor reads before incrementing tail */ + smp_mb(); + ioat_chan->tail = idx + i; + /* no active descs have written a completion? */ + BUG_ON(active && !seen_current); + ioat_chan->last_completion = phys_complete; - device->cleanup_fn((unsigned long) c); + if (active - i == 0) { + dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n", + __func__); + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + } - return dma_cookie_status(c, cookie, txstate); + /* 5 microsecond delay per pending descriptor */ + writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), + ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET); } -static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) +static void ioat_cleanup(struct ioatdma_chan *ioat_chan) { - struct ioat_chan_common *chan = &ioat->base; - struct ioat_desc_sw *desc; - struct ioat_dma_descriptor *hw; + u64 phys_complete; - spin_lock_bh(&ioat->desc_lock); + spin_lock_bh(&ioat_chan->cleanup_lock); - desc = ioat1_dma_get_next_descriptor(ioat); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); - if (!desc) { - dev_err(to_dev(chan), - "Unable to start null desc - get next desc failed\n"); - spin_unlock_bh(&ioat->desc_lock); - return; - } + if (is_ioat_halted(*ioat_chan->completion)) { + u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.compl_write = 1; - /* set size to non-zero value (channel returns error when size is 0) */ - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - async_tx_ack(&desc->txd); - hw->next = 0; - list_add_tail(&desc->node, &ioat->used_desc); - dump_desc_dbg(ioat, desc); + if (chanerr & IOAT_CHANERR_HANDLE_MASK) { + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + ioat_eh(ioat_chan); + } + } - ioat_set_chainaddr(ioat, desc->txd.phys); - ioat_start(chan); - spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); } -/* - * Perform a IOAT transaction to verify the HW works. - */ -#define IOAT_TEST_SIZE 2000 +void ioat_cleanup_event(unsigned long data) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data); + + ioat_cleanup(ioat_chan); + if (!test_bit(IOAT_RUN, &ioat_chan->state)) + return; + writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); +} -static void ioat_dma_test_callback(void *dma_async_param) +static void ioat_restart_channel(struct ioatdma_chan *ioat_chan) { - struct completion *cmp = dma_async_param; + u64 phys_complete; + + ioat_quiesce(ioat_chan, 0); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); - complete(cmp); + __ioat_restart_chan(ioat_chan); } -/** - * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. - * @device: device to be tested - */ -int ioat_dma_self_test(struct ioatdma_device *device) +static void ioat_eh(struct ioatdma_chan *ioat_chan) { - int i; - u8 *src; - u8 *dest; - struct dma_device *dma = &device->common; - struct device *dev = &device->pdev->dev; - struct dma_chan *dma_chan; + struct pci_dev *pdev = to_pdev(ioat_chan); + struct ioat_dma_descriptor *hw; struct dma_async_tx_descriptor *tx; - dma_addr_t dma_dest, dma_src; - dma_cookie_t cookie; - int err = 0; - struct completion cmp; - unsigned long tmo; - unsigned long flags; - - src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!src) - return -ENOMEM; - dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!dest) { - kfree(src); - return -ENOMEM; - } + u64 phys_complete; + struct ioat_ring_ent *desc; + u32 err_handled = 0; + u32 chanerr_int; + u32 chanerr; - /* Fill in src buffer */ - for (i = 0; i < IOAT_TEST_SIZE; i++) - src[i] = (u8)i; - - /* Start copy, using first DMA channel */ - dma_chan = container_of(dma->channels.next, struct dma_chan, - device_node); - if (dma->device_alloc_chan_resources(dma_chan) < 1) { - dev_err(dev, "selftest cannot allocate chan resource\n"); - err = -ENODEV; - goto out; - } + /* cleanup so tail points to descriptor that caused the error */ + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); - dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_src)) { - dev_err(dev, "mapping src buffer failed\n"); - goto free_resources; - } - dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, dma_dest)) { - dev_err(dev, "mapping dest buffer failed\n"); - goto unmap_src; - } - flags = DMA_PREP_INTERRUPT; - tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, - IOAT_TEST_SIZE, flags); - if (!tx) { - dev_err(dev, "Self-test prep failed, disabling\n"); - err = -ENODEV; - goto unmap_dma; - } + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test setup failed, disabling\n"); - err = -ENODEV; - goto unmap_dma; - } - dma->device_issue_pending(dma_chan); + dev_dbg(to_dev(ioat_chan), "%s: error = %x:%x\n", + __func__, chanerr, chanerr_int); - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail); + hw = desc->hw; + dump_desc_dbg(ioat_chan, desc); - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) - != DMA_COMPLETE) { - dev_err(dev, "Self-test copy timed out, disabling\n"); - err = -ENODEV; - goto unmap_dma; - } - if (memcmp(src, dest, IOAT_TEST_SIZE)) { - dev_err(dev, "Self-test copy failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; + switch (hw->ctl_f.op) { + case IOAT_OP_XOR_VAL: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + break; + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { + *desc->result |= SUM_CHECK_Q_RESULT; + err_handled |= IOAT_CHANERR_XOR_Q_ERR; + } + break; } -unmap_dma: - dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); -unmap_src: - dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); -free_resources: - dma->device_free_chan_resources(dma_chan); -out: - kfree(src); - kfree(dest); - return err; -} - -static char ioat_interrupt_style[32] = "msix"; -module_param_string(ioat_interrupt_style, ioat_interrupt_style, - sizeof(ioat_interrupt_style), 0644); -MODULE_PARM_DESC(ioat_interrupt_style, - "set ioat interrupt style: msix (default), msi, intx"); - -/** - * ioat_dma_setup_interrupts - setup interrupt handler - * @device: ioat device - */ -int ioat_dma_setup_interrupts(struct ioatdma_device *device) -{ - struct ioat_chan_common *chan; - struct pci_dev *pdev = device->pdev; - struct device *dev = &pdev->dev; - struct msix_entry *msix; - int i, j, msixcnt; - int err = -EINVAL; - u8 intrctrl = 0; - - if (!strcmp(ioat_interrupt_style, "msix")) - goto msix; - if (!strcmp(ioat_interrupt_style, "msi")) - goto msi; - if (!strcmp(ioat_interrupt_style, "intx")) - goto intx; - dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); - goto err_no_irq; - -msix: - /* The number of MSI-X vectors should equal the number of channels */ - msixcnt = device->common.chancnt; - for (i = 0; i < msixcnt; i++) - device->msix_entries[i].entry = i; - - err = pci_enable_msix_exact(pdev, device->msix_entries, msixcnt); - if (err) - goto msi; - - for (i = 0; i < msixcnt; i++) { - msix = &device->msix_entries[i]; - chan = ioat_chan_by_index(device, i); - err = devm_request_irq(dev, msix->vector, - ioat_dma_do_interrupt_msix, 0, - "ioat-msix", chan); - if (err) { - for (j = 0; j < i; j++) { - msix = &device->msix_entries[j]; - chan = ioat_chan_by_index(device, j); - devm_free_irq(dev, msix->vector, chan); + /* fault on unhandled error or spurious halt */ + if (chanerr ^ err_handled || chanerr == 0) { + dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n", + __func__, chanerr, err_handled); + BUG(); + } else { /* cleanup the faulty descriptor */ + tx = &desc->txd; + if (tx->cookie) { + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; } - goto msi; } } - intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; - device->irq_mode = IOAT_MSIX; - goto done; -msi: - err = pci_enable_msi(pdev); - if (err) - goto intx; + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); - err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, - "ioat-msi", device); - if (err) { - pci_disable_msi(pdev); - goto intx; - } - device->irq_mode = IOAT_MSI; - goto done; - -intx: - err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, - IRQF_SHARED, "ioat-intx", device); - if (err) - goto err_no_irq; - - device->irq_mode = IOAT_INTX; -done: - if (device->intr_quirk) - device->intr_quirk(device); - intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; - writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); - return 0; - -err_no_irq: - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - device->irq_mode = IOAT_NOIRQ; - dev_err(dev, "no usable interrupts\n"); - return err; -} -EXPORT_SYMBOL(ioat_dma_setup_interrupts); + /* mark faulting descriptor as complete */ + *ioat_chan->completion = desc->txd.phys; -static void ioat_disable_interrupts(struct ioatdma_device *device) -{ - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + spin_lock_bh(&ioat_chan->prep_lock); + ioat_restart_channel(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); } -int ioat_probe(struct ioatdma_device *device) +static void check_active(struct ioatdma_chan *ioat_chan) { - int err = -ENODEV; - struct dma_device *dma = &device->common; - struct pci_dev *pdev = device->pdev; - struct device *dev = &pdev->dev; - - /* DMA coherent memory pool for DMA descriptor allocations */ - device->dma_pool = pci_pool_create("dma_desc_pool", pdev, - sizeof(struct ioat_dma_descriptor), - 64, 0); - if (!device->dma_pool) { - err = -ENOMEM; - goto err_dma_pool; - } - - device->completion_pool = pci_pool_create("completion_pool", pdev, - sizeof(u64), SMP_CACHE_BYTES, - SMP_CACHE_BYTES); - - if (!device->completion_pool) { - err = -ENOMEM; - goto err_completion_pool; + if (ioat_ring_active(ioat_chan)) { + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + return; } - device->enumerate_channels(device); - - dma_cap_set(DMA_MEMCPY, dma->cap_mask); - dma->dev = &pdev->dev; + if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + else if (ioat_chan->alloc_order > ioat_get_alloc_order()) { + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + reshape_ring(ioat_chan, ioat_chan->alloc_order - 1); - if (!dma->chancnt) { - dev_err(dev, "channel enumeration error\n"); - goto err_setup_interrupts; + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat_chan->alloc_order > ioat_get_alloc_order()) + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } - err = ioat_dma_setup_interrupts(device); - if (err) - goto err_setup_interrupts; +} - err = device->self_test(device); - if (err) - goto err_self_test; +void ioat_timer_event(unsigned long data) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data); + dma_addr_t phys_complete; + u64 status; - return 0; + status = ioat_chansts(ioat_chan); -err_self_test: - ioat_disable_interrupts(device); -err_setup_interrupts: - pci_pool_destroy(device->completion_pool); -err_completion_pool: - pci_pool_destroy(device->dma_pool); -err_dma_pool: - return err; -} + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; -int ioat_register(struct ioatdma_device *device) -{ - int err = dma_async_device_register(&device->common); + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n", + __func__, chanerr); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + BUG_ON(is_ioat_bug(chanerr)); + else /* we never got off the ground */ + return; + } - if (err) { - ioat_disable_interrupts(device); - pci_pool_destroy(device->completion_pool); - pci_pool_destroy(device->dma_pool); + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + spin_lock_bh(&ioat_chan->cleanup_lock); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) { + spin_lock_bh(&ioat_chan->prep_lock); + ioat_restart_channel(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + return; + } else { + set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); } - return err; -} -/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */ -static void ioat1_intr_quirk(struct ioatdma_device *device) -{ - struct pci_dev *pdev = device->pdev; - u32 dmactrl; - - pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); - if (pdev->msi_enabled) - dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - else - dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); + if (ioat_ring_active(ioat_chan)) + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + else { + spin_lock_bh(&ioat_chan->prep_lock); + check_active(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + } + spin_unlock_bh(&ioat_chan->cleanup_lock); } -static ssize_t ring_size_show(struct dma_chan *c, char *page) +enum dma_status +ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) { - struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + enum dma_status ret; - return sprintf(page, "%d\n", ioat->desccount); -} -static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + ret = dma_cookie_status(c, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; -static ssize_t ring_active_show(struct dma_chan *c, char *page) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(c); + ioat_cleanup(ioat_chan); - return sprintf(page, "%d\n", ioat->active); + return dma_cookie_status(c, cookie, txstate); } -static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); -static ssize_t cap_show(struct dma_chan *c, char *page) +static int ioat_irq_reinit(struct ioatdma_device *ioat_dma) { - struct dma_device *dma = c->device; + struct pci_dev *pdev = ioat_dma->pdev; + int irq = pdev->irq, i; - return sprintf(page, "copy%s%s%s%s%s\n", - dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", - dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", - dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", - dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", - dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + if (!is_bwd_ioat(pdev)) + return 0; -} -struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); - -static ssize_t version_show(struct dma_chan *c, char *page) -{ - struct dma_device *dma = c->device; - struct ioatdma_device *device = to_ioatdma_device(dma); + switch (ioat_dma->irq_mode) { + case IOAT_MSIX: + for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) { + struct msix_entry *msix = &ioat_dma->msix_entries[i]; + struct ioatdma_chan *ioat_chan; - return sprintf(page, "%d.%d\n", - device->version >> 4, device->version & 0xf); -} -struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); - -static struct attribute *ioat1_attrs[] = { - &ring_size_attr.attr, - &ring_active_attr.attr, - &ioat_cap_attr.attr, - &ioat_version_attr.attr, - NULL, -}; - -static ssize_t -ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) -{ - struct ioat_sysfs_entry *entry; - struct ioat_chan_common *chan; + ioat_chan = ioat_chan_by_index(ioat_dma, i); + devm_free_irq(&pdev->dev, msix->vector, ioat_chan); + } - entry = container_of(attr, struct ioat_sysfs_entry, attr); - chan = container_of(kobj, struct ioat_chan_common, kobj); + pci_disable_msix(pdev); + break; + case IOAT_MSI: + pci_disable_msi(pdev); + /* fall through */ + case IOAT_INTX: + devm_free_irq(&pdev->dev, irq, ioat_dma); + break; + default: + return 0; + } + ioat_dma->irq_mode = IOAT_NOIRQ; - if (!entry->show) - return -EIO; - return entry->show(&chan->common, page); + return ioat_dma_setup_interrupts(ioat_dma); } -const struct sysfs_ops ioat_sysfs_ops = { - .show = ioat_attr_show, -}; - -static struct kobj_type ioat1_ktype = { - .sysfs_ops = &ioat_sysfs_ops, - .default_attrs = ioat1_attrs, -}; - -void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type) +int ioat_reset_hw(struct ioatdma_chan *ioat_chan) { - struct dma_device *dma = &device->common; - struct dma_chan *c; + /* throw away whatever the channel was doing and get it + * initialized, with ioat3 specific workarounds + */ + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct pci_dev *pdev = ioat_dma->pdev; + u32 chanerr; + u16 dev_id; + int err; + + ioat_quiesce(ioat_chan, msecs_to_jiffies(100)); - list_for_each_entry(c, &dma->channels, device_node) { - struct ioat_chan_common *chan = to_chan_common(c); - struct kobject *parent = &c->dev->device.kobj; - int err; + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata"); + if (ioat_dma->version < IOAT_VER_3_3) { + /* clear any pending errors */ + err = pci_read_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); if (err) { - dev_warn(to_dev(chan), - "sysfs init error (%d), continuing...\n", err); - kobject_put(&chan->kobj); - set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state); + dev_err(&pdev->dev, + "channel error register unreachable\n"); + return err; } - } -} + pci_write_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, chanerr); -void ioat_kobject_del(struct ioatdma_device *device) -{ - struct dma_device *dma = &device->common; - struct dma_chan *c; - - list_for_each_entry(c, &dma->channels, device_node) { - struct ioat_chan_common *chan = to_chan_common(c); - - if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) { - kobject_del(&chan->kobj); - kobject_put(&chan->kobj); + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + pci_write_config_dword(pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + 0x10); } } -} -int ioat1_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - int err; + err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); + if (!err) + err = ioat_irq_reinit(ioat_dma); - device->intr_quirk = ioat1_intr_quirk; - device->enumerate_channels = ioat1_enumerate_channels; - device->self_test = ioat_dma_self_test; - device->timer_fn = ioat1_timer_event; - device->cleanup_fn = ioat1_cleanup_event; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; - dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; - dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; - dma->device_free_chan_resources = ioat1_dma_free_chan_resources; - dma->device_tx_status = ioat_dma_tx_status; - - err = ioat_probe(device); - if (err) - return err; - err = ioat_register(device); if (err) - return err; - ioat_kobject_add(device, &ioat1_ktype); - - if (dca) - device->dca = ioat_dca_init(pdev, device->reg_base); + dev_err(&pdev->dev, "Failed to reset: %d\n", err); return err; } - -void ioat_dma_remove(struct ioatdma_device *device) -{ - struct dma_device *dma = &device->common; - - ioat_disable_interrupts(device); - - ioat_kobject_del(device); - - dma_async_device_unregister(dma); - - pci_pool_destroy(device->dma_pool); - pci_pool_destroy(device->completion_pool); - - INIT_LIST_HEAD(&dma->channels); -} diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 30f5c7eede16..1bc084986646 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -18,26 +18,32 @@ #define IOATDMA_H #include <linux/dmaengine.h> -#include "hw.h" -#include "registers.h" #include <linux/init.h> #include <linux/dmapool.h> #include <linux/cache.h> #include <linux/pci_ids.h> -#include <net/tcp.h> +#include <linux/circ_buf.h> +#include <linux/interrupt.h> +#include "registers.h" +#include "hw.h" #define IOAT_DMA_VERSION "4.00" -#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_DMA_DCA_ANY_CPU ~0 -#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) -#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) -#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) -#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) -#define to_pdev(ioat_chan) ((ioat_chan)->device->pdev) +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev) +#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev) +#define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80) -#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) +/* ioat hardware assumes at least two sources for raid operations */ +#define src_cnt_to_sw(x) ((x) + 2) +#define src_cnt_to_hw(x) ((x) - 2) +#define ndest_to_sw(x) ((x) + 1) +#define ndest_to_hw(x) ((x) - 1) +#define src16_cnt_to_sw(x) ((x) + 9) +#define src16_cnt_to_hw(x) ((x) - 9) /* * workaround for IOAT ver.3.0 null descriptor issue @@ -57,19 +63,15 @@ enum ioat_irq_mode { * @pdev: PCI-Express device * @reg_base: MMIO register space base address * @dma_pool: for allocating DMA descriptors - * @common: embedded struct dma_device + * @completion_pool: DMA buffers for completion ops + * @sed_hw_pool: DMA super descriptor pools + * @dma_dev: embedded struct dma_device * @version: version of ioatdma device * @msix_entries: irq handlers * @idx: per channel data * @dca: direct cache access context - * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) - * @enumerate_channels: hw version specific channel enumeration - * @reset_hw: hw version specific channel (re)initialization - * @cleanup_fn: select between the v2 and v3 cleanup routines - * @timer_fn: select between the v2 and v3 timer watchdog routines - * @self_test: hardware version specific self test for each supported op type - * - * Note: the v3 cleanup routine supports raid operations + * @irq_mode: interrupt mode (INTX, MSI, MSIX) + * @cap: read DMA capabilities register */ struct ioatdma_device { struct pci_dev *pdev; @@ -78,28 +80,21 @@ struct ioatdma_device { struct pci_pool *completion_pool; #define MAX_SED_POOLS 5 struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; - struct dma_device common; + struct dma_device dma_dev; u8 version; struct msix_entry msix_entries[4]; - struct ioat_chan_common *idx[4]; + struct ioatdma_chan *idx[4]; struct dca_provider *dca; enum ioat_irq_mode irq_mode; u32 cap; - void (*intr_quirk)(struct ioatdma_device *device); - int (*enumerate_channels)(struct ioatdma_device *device); - int (*reset_hw)(struct ioat_chan_common *chan); - void (*cleanup_fn)(unsigned long data); - void (*timer_fn)(unsigned long data); - int (*self_test)(struct ioatdma_device *device); }; -struct ioat_chan_common { - struct dma_chan common; +struct ioatdma_chan { + struct dma_chan dma_chan; void __iomem *reg_base; dma_addr_t last_completion; spinlock_t cleanup_lock; unsigned long state; - #define IOAT_COMPLETION_PENDING 0 #define IOAT_COMPLETION_ACK 1 #define IOAT_RESET_PENDING 2 #define IOAT_KOBJ_INIT_FAIL 3 @@ -110,11 +105,32 @@ struct ioat_chan_common { #define COMPLETION_TIMEOUT msecs_to_jiffies(100) #define IDLE_TIMEOUT msecs_to_jiffies(2000) #define RESET_DELAY msecs_to_jiffies(100) - struct ioatdma_device *device; + struct ioatdma_device *ioat_dma; dma_addr_t completion_dma; u64 *completion; struct tasklet_struct cleanup_task; struct kobject kobj; + +/* ioat v2 / v3 channel attributes + * @xfercap_log; log2 of channel max transfer length (for fast division) + * @head: allocated index + * @issued: hardware notification point + * @tail: cleanup index + * @dmacount: identical to 'head' except for occasionally resetting to zero + * @alloc_order: log2 of the number of allocated descriptors + * @produce: number of descriptors to produce at submit time + * @ring: software ring buffer implementation of hardware ring + * @prep_lock: serializes descriptor preparation (producers) + */ + size_t xfercap_log; + u16 head; + u16 issued; + u16 tail; + u16 dmacount; + u16 alloc_order; + u16 produce; + struct ioat_ring_ent **ring; + spinlock_t prep_lock; }; struct ioat_sysfs_entry { @@ -123,28 +139,11 @@ struct ioat_sysfs_entry { }; /** - * struct ioat_dma_chan - internal representation of a DMA channel - */ -struct ioat_dma_chan { - struct ioat_chan_common base; - - size_t xfercap; /* XFERCAP register value expanded out */ - - spinlock_t desc_lock; - struct list_head free_desc; - struct list_head used_desc; - - int pending; - u16 desccount; - u16 active; -}; - -/** * struct ioat_sed_ent - wrapper around super extended hardware descriptor * @hw: hardware SED - * @sed_dma: dma address for the SED - * @list: list member + * @dma: dma address for the SED * @parent: point to the dma descriptor that's the parent + * @hw_pool: descriptor pool index */ struct ioat_sed_ent { struct ioat_sed_raw_descriptor *hw; @@ -153,39 +152,57 @@ struct ioat_sed_ent { unsigned int hw_pool; }; -static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) -{ - return container_of(c, struct ioat_chan_common, common); -} - -static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) -{ - struct ioat_chan_common *chan = to_chan_common(c); - - return container_of(chan, struct ioat_dma_chan, base); -} - -/* wrapper around hardware descriptor format + additional software fields */ - /** - * struct ioat_desc_sw - wrapper around hardware descriptor + * struct ioat_ring_ent - wrapper around hardware descriptor * @hw: hardware DMA descriptor (for memcpy) - * @node: this descriptor will either be on the free list, - * or attached to a transaction list (tx_list) + * @xor: hardware xor descriptor + * @xor_ex: hardware xor extension descriptor + * @pq: hardware pq descriptor + * @pq_ex: hardware pq extension descriptor + * @pqu: hardware pq update descriptor + * @raw: hardware raw (un-typed) descriptor * @txd: the generic software descriptor for all engines + * @len: total transaction length for unmap + * @result: asynchronous result of validate operations * @id: identifier for debug + * @sed: pointer to super extended descriptor sw desc */ -struct ioat_desc_sw { - struct ioat_dma_descriptor *hw; - struct list_head node; + +struct ioat_ring_ent { + union { + struct ioat_dma_descriptor *hw; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex; + struct ioat_pq_update_descriptor *pqu; + struct ioat_raw_descriptor *raw; + }; size_t len; - struct list_head tx_list; struct dma_async_tx_descriptor txd; + enum sum_check_flags *result; #ifdef DEBUG int id; #endif + struct ioat_sed_ent *sed; }; +extern const struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; +extern int ioat_pending_level; +extern int ioat_ring_alloc_order; +extern struct kobj_type ioat_ktype; +extern struct kmem_cache *ioat_cache; +extern int ioat_ring_max_alloc_order; +extern struct kmem_cache *ioat_sed_cache; + +static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c) +{ + return container_of(c, struct ioatdma_chan, dma_chan); +} + +/* wrapper around hardware descriptor format + additional software fields */ #ifdef DEBUG #define set_desc_id(desc, i) ((desc)->id = (i)) #define desc_id(desc) ((desc)->id) @@ -195,10 +212,10 @@ struct ioat_desc_sw { #endif static inline void -__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, +__dump_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_dma_descriptor *hw, struct dma_async_tx_descriptor *tx, int id) { - struct device *dev = to_dev(chan); + struct device *dev = to_dev(ioat_chan); dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" " ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id, @@ -208,25 +225,25 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, } #define dump_desc_dbg(c, d) \ - ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; }) + ({ if (d) __dump_desc_dbg(c, d->hw, &d->txd, desc_id(d)); 0; }) -static inline struct ioat_chan_common * -ioat_chan_by_index(struct ioatdma_device *device, int index) +static inline struct ioatdma_chan * +ioat_chan_by_index(struct ioatdma_device *ioat_dma, int index) { - return device->idx[index]; + return ioat_dma->idx[index]; } -static inline u64 ioat_chansts_32(struct ioat_chan_common *chan) +static inline u64 ioat_chansts_32(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; u64 status; u32 status_lo; /* We need to read the low address first as this causes the * chipset to latch the upper bits for the subsequent read */ - status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); - status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); + status_lo = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); + status = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); status <<= 32; status |= status_lo; @@ -235,16 +252,16 @@ static inline u64 ioat_chansts_32(struct ioat_chan_common *chan) #if BITS_PER_LONG == 64 -static inline u64 ioat_chansts(struct ioat_chan_common *chan) +static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; u64 status; /* With IOAT v3.3 the status register is 64bit. */ if (ver >= IOAT_VER_3_3) - status = readq(chan->reg_base + IOAT_CHANSTS_OFFSET(ver)); + status = readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET(ver)); else - status = ioat_chansts_32(chan); + status = ioat_chansts_32(ioat_chan); return status; } @@ -253,56 +270,41 @@ static inline u64 ioat_chansts(struct ioat_chan_common *chan) #define ioat_chansts ioat_chansts_32 #endif -static inline void ioat_start(struct ioat_chan_common *chan) -{ - u8 ver = chan->device->version; - - writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); -} - static inline u64 ioat_chansts_to_addr(u64 status) { return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; } -static inline u32 ioat_chanerr(struct ioat_chan_common *chan) +static inline u32 ioat_chanerr(struct ioatdma_chan *ioat_chan) { - return readl(chan->reg_base + IOAT_CHANERR_OFFSET); + return readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); } -static inline void ioat_suspend(struct ioat_chan_common *chan) +static inline void ioat_suspend(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; - writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + writeb(IOAT_CHANCMD_SUSPEND, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } -static inline void ioat_reset(struct ioat_chan_common *chan) +static inline void ioat_reset(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; - writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } -static inline bool ioat_reset_pending(struct ioat_chan_common *chan) +static inline bool ioat_reset_pending(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; u8 cmd; - cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + cmd = readb(ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET; } -static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) -{ - struct ioat_chan_common *chan = &ioat->base; - - writel(addr & 0x00000000FFFFFFFF, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(addr >> 32, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); -} - static inline bool is_ioat_active(unsigned long status) { return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); @@ -329,24 +331,111 @@ static inline bool is_ioat_bug(unsigned long err) return !!err; } -int ioat_probe(struct ioatdma_device *device); -int ioat_register(struct ioatdma_device *device); -int ioat1_dma_probe(struct ioatdma_device *dev, int dca); -int ioat_dma_self_test(struct ioatdma_device *device); -void ioat_dma_remove(struct ioatdma_device *device); +#define IOAT_MAX_ORDER 16 +#define ioat_get_alloc_order() \ + (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) +#define ioat_get_max_alloc_order() \ + (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) + +static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) +{ + return 1 << ioat_chan->alloc_order; +} + +/* count of descriptors in flight with the engine */ +static inline u16 ioat_ring_active(struct ioatdma_chan *ioat_chan) +{ + return CIRC_CNT(ioat_chan->head, ioat_chan->tail, + ioat_ring_size(ioat_chan)); +} + +/* count of descriptors pending submission to hardware */ +static inline u16 ioat_ring_pending(struct ioatdma_chan *ioat_chan) +{ + return CIRC_CNT(ioat_chan->head, ioat_chan->issued, + ioat_ring_size(ioat_chan)); +} + +static inline u32 ioat_ring_space(struct ioatdma_chan *ioat_chan) +{ + return ioat_ring_size(ioat_chan) - ioat_ring_active(ioat_chan); +} + +static inline u16 +ioat_xferlen_to_descs(struct ioatdma_chan *ioat_chan, size_t len) +{ + u16 num_descs = len >> ioat_chan->xfercap_log; + + num_descs += !!(len & ((1 << ioat_chan->xfercap_log) - 1)); + return num_descs; +} + +static inline struct ioat_ring_ent * +ioat_get_ring_ent(struct ioatdma_chan *ioat_chan, u16 idx) +{ + return ioat_chan->ring[idx & (ioat_ring_size(ioat_chan) - 1)]; +} + +static inline void +ioat_set_chainaddr(struct ioatdma_chan *ioat_chan, u64 addr) +{ + writel(addr & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); +} + +/* IOAT Prep functions */ +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags); + +/* IOAT Operation functions */ +irqreturn_t ioat_dma_do_interrupt(int irq, void *data); +irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data); +struct ioat_ring_ent ** +ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags); +void ioat_start_null_desc(struct ioatdma_chan *ioat_chan); +void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan); +int ioat_reset_hw(struct ioatdma_chan *ioat_chan); +enum dma_status +ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate); +void ioat_cleanup_event(unsigned long data); +void ioat_timer_event(unsigned long data); +int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs); +void ioat_issue_pending(struct dma_chan *chan); +void ioat_timer_event(unsigned long data); + +/* IOAT Init functions */ +bool is_bwd_ioat(struct pci_dev *pdev); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); -dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan); -void ioat_init_channel(struct ioatdma_device *device, - struct ioat_chan_common *chan, int idx); -enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, - struct dma_tx_state *txstate); -bool ioat_cleanup_preamble(struct ioat_chan_common *chan, - dma_addr_t *phys_complete); -void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); -void ioat_kobject_del(struct ioatdma_device *device); -int ioat_dma_setup_interrupts(struct ioatdma_device *device); -void ioat_stop(struct ioat_chan_common *chan); -extern const struct sysfs_ops ioat_sysfs_ops; -extern struct ioat_sysfs_entry ioat_version_attr; -extern struct ioat_sysfs_entry ioat_cap_attr; +void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *ioat_dma); +int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma); +void ioat_stop(struct ioatdma_chan *ioat_chan); #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c deleted file mode 100644 index 69c7dfcad023..000000000000 --- a/drivers/dma/ioat/dma_v2.c +++ /dev/null @@ -1,916 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2004 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine (versions >= 2), which - * does asynchronous data movement and checksumming operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dmaengine.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> -#include <linux/workqueue.h> -#include <linux/prefetch.h> -#include <linux/i7300_idle.h> -#include "dma.h" -#include "dma_v2.h" -#include "registers.h" -#include "hw.h" - -#include "../dmaengine.h" - -int ioat_ring_alloc_order = 8; -module_param(ioat_ring_alloc_order, int, 0644); -MODULE_PARM_DESC(ioat_ring_alloc_order, - "ioat2+: allocate 2^n descriptors per channel" - " (default: 8 max: 16)"); -static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; -module_param(ioat_ring_max_alloc_order, int, 0644); -MODULE_PARM_DESC(ioat_ring_max_alloc_order, - "ioat2+: upper limit for ring size (default: 16)"); - -void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - ioat->dmacount += ioat2_ring_pending(ioat); - ioat->issued = ioat->head; - writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); - dev_dbg(to_dev(chan), - "%s: head: %#x tail: %#x issued: %#x count: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); -} - -void ioat2_issue_pending(struct dma_chan *c) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - - if (ioat2_ring_pending(ioat)) { - spin_lock_bh(&ioat->prep_lock); - __ioat2_issue_pending(ioat); - spin_unlock_bh(&ioat->prep_lock); - } -} - -/** - * ioat2_update_pending - log pending descriptors - * @ioat: ioat2+ channel - * - * Check if the number of unsubmitted descriptors has exceeded the - * watermark. Called with prep_lock held - */ -static void ioat2_update_pending(struct ioat2_dma_chan *ioat) -{ - if (ioat2_ring_pending(ioat) > ioat_pending_level) - __ioat2_issue_pending(ioat); -} - -static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) -{ - struct ioat_ring_ent *desc; - struct ioat_dma_descriptor *hw; - - if (ioat2_ring_space(ioat) < 1) { - dev_err(to_dev(&ioat->base), - "Unable to start null desc - ring full\n"); - return; - } - - dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); - desc = ioat2_get_ring_ent(ioat, ioat->head); - - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.compl_write = 1; - /* set size to non-zero value (channel returns error when size is 0) */ - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - async_tx_ack(&desc->txd); - ioat2_set_chainaddr(ioat, desc->txd.phys); - dump_desc_dbg(ioat, desc); - wmb(); - ioat->head += 1; - __ioat2_issue_pending(ioat); -} - -static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) -{ - spin_lock_bh(&ioat->prep_lock); - __ioat2_start_null_desc(ioat); - spin_unlock_bh(&ioat->prep_lock); -} - -static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) -{ - struct ioat_chan_common *chan = &ioat->base; - struct dma_async_tx_descriptor *tx; - struct ioat_ring_ent *desc; - bool seen_current = false; - u16 active; - int idx = ioat->tail, i; - - dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); - - active = ioat2_ring_active(ioat); - for (i = 0; i < active && !seen_current; i++) { - smp_read_barrier_depends(); - prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); - desc = ioat2_get_ring_ent(ioat, idx + i); - tx = &desc->txd; - dump_desc_dbg(ioat, desc); - if (tx->cookie) { - dma_descriptor_unmap(tx); - dma_cookie_complete(tx); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - - if (tx->phys == phys_complete) - seen_current = true; - } - smp_mb(); /* finish all descriptor reads before incrementing tail */ - ioat->tail = idx + i; - BUG_ON(active && !seen_current); /* no active descs have written a completion? */ - - chan->last_completion = phys_complete; - if (active - i == 0) { - dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", - __func__); - clear_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } -} - -/** - * ioat2_cleanup - clean finished descriptors (advance tail pointer) - * @chan: ioat channel to be cleaned up - */ -static void ioat2_cleanup(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - spin_unlock_bh(&chan->cleanup_lock); -} - -void ioat2_cleanup_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - - ioat2_cleanup(ioat); - if (!test_bit(IOAT_RUN, &chan->state)) - return; - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); -} - -void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - /* set the tail to be re-issued */ - ioat->issued = ioat->tail; - ioat->dmacount = 0; - set_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - - dev_dbg(to_dev(chan), - "%s: head: %#x tail: %#x issued: %#x count: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); - - if (ioat2_ring_pending(ioat)) { - struct ioat_ring_ent *desc; - - desc = ioat2_get_ring_ent(ioat, ioat->tail); - ioat2_set_chainaddr(ioat, desc->txd.phys); - __ioat2_issue_pending(ioat); - } else - __ioat2_start_null_desc(ioat); -} - -int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) -{ - unsigned long end = jiffies + tmo; - int err = 0; - u32 status; - - status = ioat_chansts(chan); - if (is_ioat_active(status) || is_ioat_idle(status)) - ioat_suspend(chan); - while (is_ioat_active(status) || is_ioat_idle(status)) { - if (tmo && time_after(jiffies, end)) { - err = -ETIMEDOUT; - break; - } - status = ioat_chansts(chan); - cpu_relax(); - } - - return err; -} - -int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo) -{ - unsigned long end = jiffies + tmo; - int err = 0; - - ioat_reset(chan); - while (ioat_reset_pending(chan)) { - if (end && time_after(jiffies, end)) { - err = -ETIMEDOUT; - break; - } - cpu_relax(); - } - - return err; -} - -static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - - ioat2_quiesce(chan, 0); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - __ioat2_restart_chan(ioat); -} - -static void check_active(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - if (ioat2_ring_active(ioat)) { - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - return; - } - - if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state)) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - else if (ioat->alloc_order > ioat_get_alloc_order()) { - /* if the ring is idle, empty, and oversized try to step - * down the size - */ - reshape_ring(ioat, ioat->alloc_order - 1); - - /* keep shrinking until we get back to our minimum - * default size - */ - if (ioat->alloc_order > ioat_get_alloc_order()) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } - -} - -void ioat2_timer_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - u64 status; - - status = ioat_chansts(chan); - - /* when halted due to errors check for channel - * programming errors before advancing the completion state - */ - if (is_ioat_halted(status)) { - u32 chanerr; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "%s: Channel halted (%x)\n", - __func__, chanerr); - if (test_bit(IOAT_RUN, &chan->state)) - BUG_ON(is_ioat_bug(chanerr)); - else /* we never got off the ground */ - return; - } - - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { - spin_lock_bh(&ioat->prep_lock); - ioat2_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - return; - } else { - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - } - - - if (ioat2_ring_active(ioat)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - else { - spin_lock_bh(&ioat->prep_lock); - check_active(ioat); - spin_unlock_bh(&ioat->prep_lock); - } - spin_unlock_bh(&chan->cleanup_lock); -} - -static int ioat2_reset_hw(struct ioat_chan_common *chan) -{ - /* throw away whatever the channel was doing and get it initialized */ - u32 chanerr; - - ioat2_quiesce(chan, msecs_to_jiffies(100)); - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - - return ioat2_reset_sync(chan, msecs_to_jiffies(200)); -} - -/** - * ioat2_enumerate_channels - find and initialize the device's channels - * @device: the device to be enumerated - */ -int ioat2_enumerate_channels(struct ioatdma_device *device) -{ - struct ioat2_dma_chan *ioat; - struct device *dev = &device->pdev->dev; - struct dma_device *dma = &device->common; - u8 xfercap_log; - int i; - - INIT_LIST_HEAD(&dma->channels); - dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); - dma->chancnt &= 0x1f; /* bits [4:0] valid */ - if (dma->chancnt > ARRAY_SIZE(device->idx)) { - dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", - dma->chancnt, ARRAY_SIZE(device->idx)); - dma->chancnt = ARRAY_SIZE(device->idx); - } - xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); - xfercap_log &= 0x1f; /* bits [4:0] valid */ - if (xfercap_log == 0) - return 0; - dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); - - /* FIXME which i/oat version is i7300? */ -#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) - dma->chancnt--; -#endif - for (i = 0; i < dma->chancnt; i++) { - ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); - if (!ioat) - break; - - ioat_init_channel(device, &ioat->base, i); - ioat->xfercap_log = xfercap_log; - spin_lock_init(&ioat->prep_lock); - if (device->reset_hw(&ioat->base)) { - i = 0; - break; - } - } - dma->chancnt = i; - return i; -} - -static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) -{ - struct dma_chan *c = tx->chan; - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - dma_cookie_t cookie; - - cookie = dma_cookie_assign(tx); - dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); - - if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &chan->state)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - - /* make descriptor updates visible before advancing ioat->head, - * this is purposefully not smp_wmb() since we are also - * publishing the descriptor updates to a dma device - */ - wmb(); - - ioat->head += ioat->produce; - - ioat2_update_pending(ioat); - spin_unlock_bh(&ioat->prep_lock); - - return cookie; -} - -static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) -{ - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *desc; - struct ioatdma_device *dma; - dma_addr_t phys; - - dma = to_ioatdma_device(chan->device); - hw = pci_pool_alloc(dma->dma_pool, flags, &phys); - if (!hw) - return NULL; - memset(hw, 0, sizeof(*hw)); - - desc = kmem_cache_zalloc(ioat2_cache, flags); - if (!desc) { - pci_pool_free(dma->dma_pool, hw, phys); - return NULL; - } - - dma_async_tx_descriptor_init(&desc->txd, chan); - desc->txd.tx_submit = ioat2_tx_submit_unlock; - desc->hw = hw; - desc->txd.phys = phys; - return desc; -} - -static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) -{ - struct ioatdma_device *dma; - - dma = to_ioatdma_device(chan->device); - pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); - kmem_cache_free(ioat2_cache, desc); -} - -static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) -{ - struct ioat_ring_ent **ring; - int descs = 1 << order; - int i; - - if (order > ioat_get_max_alloc_order()) - return NULL; - - /* allocate the array to hold the software ring */ - ring = kcalloc(descs, sizeof(*ring), flags); - if (!ring) - return NULL; - for (i = 0; i < descs; i++) { - ring[i] = ioat2_alloc_ring_ent(c, flags); - if (!ring[i]) { - while (i--) - ioat2_free_ring_ent(ring[i], c); - kfree(ring); - return NULL; - } - set_desc_id(ring[i], i); - } - - /* link descs */ - for (i = 0; i < descs-1; i++) { - struct ioat_ring_ent *next = ring[i+1]; - struct ioat_dma_descriptor *hw = ring[i]->hw; - - hw->next = next->txd.phys; - } - ring[i]->hw->next = ring[0]->txd.phys; - - return ring; -} - -void ioat2_free_chan_resources(struct dma_chan *c); - -/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring - * @chan: channel to be initialized - */ -int ioat2_alloc_chan_resources(struct dma_chan *c) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioat_ring_ent **ring; - u64 status; - int order; - int i = 0; - - /* have we already been set up? */ - if (ioat->ring) - return 1 << ioat->alloc_order; - - /* Setup register to interrupt and write completion status on error */ - writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); - - /* allocate a completion writeback area */ - /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - chan->completion = pci_pool_alloc(chan->device->completion_pool, - GFP_KERNEL, &chan->completion_dma); - if (!chan->completion) - return -ENOMEM; - - memset(chan->completion, 0, sizeof(*chan->completion)); - writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) chan->completion_dma) >> 32, - chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - order = ioat_get_alloc_order(); - ring = ioat2_alloc_ring(c, order, GFP_KERNEL); - if (!ring) - return -ENOMEM; - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - ioat->ring = ring; - ioat->head = 0; - ioat->issued = 0; - ioat->tail = 0; - ioat->alloc_order = order; - set_bit(IOAT_RUN, &chan->state); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - - ioat2_start_null_desc(ioat); - - /* check that we got off the ground */ - do { - udelay(1); - status = ioat_chansts(chan); - } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); - - if (is_ioat_active(status) || is_ioat_idle(status)) { - return 1 << ioat->alloc_order; - } else { - u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - - dev_WARN(to_dev(chan), - "failed to start channel chanerr: %#x\n", chanerr); - ioat2_free_chan_resources(c); - return -EFAULT; - } -} - -bool reshape_ring(struct ioat2_dma_chan *ioat, int order) -{ - /* reshape differs from normal ring allocation in that we want - * to allocate a new software ring while only - * extending/truncating the hardware ring - */ - struct ioat_chan_common *chan = &ioat->base; - struct dma_chan *c = &chan->common; - const u32 curr_size = ioat2_ring_size(ioat); - const u16 active = ioat2_ring_active(ioat); - const u32 new_size = 1 << order; - struct ioat_ring_ent **ring; - u16 i; - - if (order > ioat_get_max_alloc_order()) - return false; - - /* double check that we have at least 1 free descriptor */ - if (active == curr_size) - return false; - - /* when shrinking, verify that we can hold the current active - * set in the new ring - */ - if (active >= new_size) - return false; - - /* allocate the array to hold the software ring */ - ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); - if (!ring) - return false; - - /* allocate/trim descriptors as needed */ - if (new_size > curr_size) { - /* copy current descriptors to the new ring */ - for (i = 0; i < curr_size; i++) { - u16 curr_idx = (ioat->tail+i) & (curr_size-1); - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ring[new_idx] = ioat->ring[curr_idx]; - set_desc_id(ring[new_idx], new_idx); - } - - /* add new descriptors to the ring */ - for (i = curr_size; i < new_size; i++) { - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); - if (!ring[new_idx]) { - while (i--) { - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ioat2_free_ring_ent(ring[new_idx], c); - } - kfree(ring); - return false; - } - set_desc_id(ring[new_idx], new_idx); - } - - /* hw link new descriptors */ - for (i = curr_size-1; i < new_size; i++) { - u16 new_idx = (ioat->tail+i) & (new_size-1); - struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; - struct ioat_dma_descriptor *hw = ring[new_idx]->hw; - - hw->next = next->txd.phys; - } - } else { - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *next; - - /* copy current descriptors to the new ring, dropping the - * removed descriptors - */ - for (i = 0; i < new_size; i++) { - u16 curr_idx = (ioat->tail+i) & (curr_size-1); - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ring[new_idx] = ioat->ring[curr_idx]; - set_desc_id(ring[new_idx], new_idx); - } - - /* free deleted descriptors */ - for (i = new_size; i < curr_size; i++) { - struct ioat_ring_ent *ent; - - ent = ioat2_get_ring_ent(ioat, ioat->tail+i); - ioat2_free_ring_ent(ent, c); - } - - /* fix up hardware ring */ - hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; - next = ring[(ioat->tail+new_size) & (new_size-1)]; - hw->next = next->txd.phys; - } - - dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", - __func__, new_size); - - kfree(ioat->ring); - ioat->ring = ring; - ioat->alloc_order = order; - - return true; -} - -/** - * ioat2_check_space_lock - verify space and grab ring producer lock - * @ioat: ioat2,3 channel (ring) to operate on - * @num_descs: allocation length - */ -int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs) -{ - struct ioat_chan_common *chan = &ioat->base; - bool retry; - - retry: - spin_lock_bh(&ioat->prep_lock); - /* never allow the last descriptor to be consumed, we need at - * least one free at all times to allow for on-the-fly ring - * resizing. - */ - if (likely(ioat2_ring_space(ioat) > num_descs)) { - dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", - __func__, num_descs, ioat->head, ioat->tail, ioat->issued); - ioat->produce = num_descs; - return 0; /* with ioat->prep_lock held */ - } - retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state); - spin_unlock_bh(&ioat->prep_lock); - - /* is another cpu already trying to expand the ring? */ - if (retry) - goto retry; - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - retry = reshape_ring(ioat, ioat->alloc_order + 1); - clear_bit(IOAT_RESHAPE_PENDING, &chan->state); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - - /* if we were able to expand the ring retry the allocation */ - if (retry) - goto retry; - - if (printk_ratelimit()) - dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", - __func__, num_descs, ioat->head, ioat->tail, ioat->issued); - - /* progress reclaim in the allocation failure case we may be - * called under bh_disabled so we need to trigger the timer - * event directly - */ - if (time_is_before_jiffies(chan->timer.expires) - && timer_pending(&chan->timer)) { - struct ioatdma_device *device = chan->device; - - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - device->timer_fn((unsigned long) &chan->common); - } - - return -ENOMEM; -} - -struct dma_async_tx_descriptor * -ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *desc; - dma_addr_t dst = dma_dest; - dma_addr_t src = dma_src; - size_t total_len = len; - int num_descs, idx, i; - - num_descs = ioat2_xferlen_to_descs(ioat, len); - if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - hw = desc->hw; - - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - - len -= copy; - dst += copy; - src += copy; - dump_desc_dbg(ioat, desc); - } while (++i < num_descs); - - desc->txd.flags = flags; - desc->len = total_len; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - hw->ctl_f.compl_write = 1; - dump_desc_dbg(ioat, desc); - /* we leave the channel locked to ensure in order submission */ - - return &desc->txd; -} - -/** - * ioat2_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned - */ -void ioat2_free_chan_resources(struct dma_chan *c) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *desc; - const u16 total_descs = 1 << ioat->alloc_order; - int descs; - int i; - - /* Before freeing channel resources first check - * if they have been previously allocated for this channel. - */ - if (!ioat->ring) - return; - - ioat_stop(chan); - device->reset_hw(chan); - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - descs = ioat2_ring_space(ioat); - dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); - for (i = 0; i < descs; i++) { - desc = ioat2_get_ring_ent(ioat, ioat->head + i); - ioat2_free_ring_ent(desc, c); - } - - if (descs < total_descs) - dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", - total_descs - descs); - - for (i = 0; i < total_descs - descs; i++) { - desc = ioat2_get_ring_ent(ioat, ioat->tail + i); - dump_desc_dbg(ioat, desc); - ioat2_free_ring_ent(desc, c); - } - - kfree(ioat->ring); - ioat->ring = NULL; - ioat->alloc_order = 0; - pci_pool_free(device->completion_pool, chan->completion, - chan->completion_dma); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - - chan->last_completion = 0; - chan->completion_dma = 0; - ioat->dmacount = 0; -} - -static ssize_t ring_size_show(struct dma_chan *c, char *page) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - - return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); -} -static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); - -static ssize_t ring_active_show(struct dma_chan *c, char *page) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - - /* ...taken outside the lock, no need to be precise */ - return sprintf(page, "%d\n", ioat2_ring_active(ioat)); -} -static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); - -static struct attribute *ioat2_attrs[] = { - &ring_size_attr.attr, - &ring_active_attr.attr, - &ioat_cap_attr.attr, - &ioat_version_attr.attr, - NULL, -}; - -struct kobj_type ioat2_ktype = { - .sysfs_ops = &ioat_sysfs_ops, - .default_attrs = ioat2_attrs, -}; - -int ioat2_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - - device->enumerate_channels = ioat2_enumerate_channels; - device->reset_hw = ioat2_reset_hw; - device->cleanup_fn = ioat2_cleanup_event; - device->timer_fn = ioat2_timer_event; - device->self_test = ioat_dma_self_test; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; - dma->device_issue_pending = ioat2_issue_pending; - dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; - dma->device_free_chan_resources = ioat2_free_chan_resources; - dma->device_tx_status = ioat_dma_tx_status; - - err = ioat_probe(device); - if (err) - return err; - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - - ioat_kobject_add(device, &ioat2_ktype); - - if (dca) - device->dca = ioat2_dca_init(pdev, device->reg_base); - - return err; -} diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h deleted file mode 100644 index bf24ebe874b0..000000000000 --- a/drivers/dma/ioat/dma_v2.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef IOATDMA_V2_H -#define IOATDMA_V2_H - -#include <linux/dmaengine.h> -#include <linux/circ_buf.h> -#include "dma.h" -#include "hw.h" - - -extern int ioat_pending_level; -extern int ioat_ring_alloc_order; - -/* - * workaround for IOAT ver.3.0 null descriptor issue - * (channel returns error when size is 0) - */ -#define NULL_DESC_BUFFER_SIZE 1 - -#define IOAT_MAX_ORDER 16 -#define ioat_get_alloc_order() \ - (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) -#define ioat_get_max_alloc_order() \ - (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) - -/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes - * @base: common ioat channel parameters - * @xfercap_log; log2 of channel max transfer length (for fast division) - * @head: allocated index - * @issued: hardware notification point - * @tail: cleanup index - * @dmacount: identical to 'head' except for occasionally resetting to zero - * @alloc_order: log2 of the number of allocated descriptors - * @produce: number of descriptors to produce at submit time - * @ring: software ring buffer implementation of hardware ring - * @prep_lock: serializes descriptor preparation (producers) - */ -struct ioat2_dma_chan { - struct ioat_chan_common base; - size_t xfercap_log; - u16 head; - u16 issued; - u16 tail; - u16 dmacount; - u16 alloc_order; - u16 produce; - struct ioat_ring_ent **ring; - spinlock_t prep_lock; -}; - -static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) -{ - struct ioat_chan_common *chan = to_chan_common(c); - - return container_of(chan, struct ioat2_dma_chan, base); -} - -static inline u32 ioat2_ring_size(struct ioat2_dma_chan *ioat) -{ - return 1 << ioat->alloc_order; -} - -/* count of descriptors in flight with the engine */ -static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) -{ - return CIRC_CNT(ioat->head, ioat->tail, ioat2_ring_size(ioat)); -} - -/* count of descriptors pending submission to hardware */ -static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) -{ - return CIRC_CNT(ioat->head, ioat->issued, ioat2_ring_size(ioat)); -} - -static inline u32 ioat2_ring_space(struct ioat2_dma_chan *ioat) -{ - return ioat2_ring_size(ioat) - ioat2_ring_active(ioat); -} - -static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) -{ - u16 num_descs = len >> ioat->xfercap_log; - - num_descs += !!(len & ((1 << ioat->xfercap_log) - 1)); - return num_descs; -} - -/** - * struct ioat_ring_ent - wrapper around hardware descriptor - * @hw: hardware DMA descriptor (for memcpy) - * @fill: hardware fill descriptor - * @xor: hardware xor descriptor - * @xor_ex: hardware xor extension descriptor - * @pq: hardware pq descriptor - * @pq_ex: hardware pq extension descriptor - * @pqu: hardware pq update descriptor - * @raw: hardware raw (un-typed) descriptor - * @txd: the generic software descriptor for all engines - * @len: total transaction length for unmap - * @result: asynchronous result of validate operations - * @id: identifier for debug - */ - -struct ioat_ring_ent { - union { - struct ioat_dma_descriptor *hw; - struct ioat_xor_descriptor *xor; - struct ioat_xor_ext_descriptor *xor_ex; - struct ioat_pq_descriptor *pq; - struct ioat_pq_ext_descriptor *pq_ex; - struct ioat_pq_update_descriptor *pqu; - struct ioat_raw_descriptor *raw; - }; - size_t len; - struct dma_async_tx_descriptor txd; - enum sum_check_flags *result; - #ifdef DEBUG - int id; - #endif - struct ioat_sed_ent *sed; -}; - -static inline struct ioat_ring_ent * -ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) -{ - return ioat->ring[idx & (ioat2_ring_size(ioat) - 1)]; -} - -static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) -{ - struct ioat_chan_common *chan = &ioat->base; - - writel(addr & 0x00000000FFFFFFFF, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(addr >> 32, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); -} - -int ioat2_dma_probe(struct ioatdma_device *dev, int dca); -int ioat3_dma_probe(struct ioatdma_device *dev, int dca); -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); -int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); -int ioat2_enumerate_channels(struct ioatdma_device *device); -struct dma_async_tx_descriptor * -ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags); -void ioat2_issue_pending(struct dma_chan *chan); -int ioat2_alloc_chan_resources(struct dma_chan *c); -void ioat2_free_chan_resources(struct dma_chan *c); -void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); -bool reshape_ring(struct ioat2_dma_chan *ioat, int order); -void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); -void ioat2_cleanup_event(unsigned long data); -void ioat2_timer_event(unsigned long data); -int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo); -int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo); -extern struct kobj_type ioat2_ktype; -extern struct kmem_cache *ioat2_cache; -#endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c deleted file mode 100644 index 64790a45ef5d..000000000000 --- a/drivers/dma/ioat/dma_v3.c +++ /dev/null @@ -1,1717 +0,0 @@ -/* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * BSD LICENSE - * - * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Support routines for v3+ hardware - */ -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/gfp.h> -#include <linux/dmaengine.h> -#include <linux/dma-mapping.h> -#include <linux/prefetch.h> -#include "../dmaengine.h" -#include "registers.h" -#include "hw.h" -#include "dma.h" -#include "dma_v2.h" - -extern struct kmem_cache *ioat3_sed_cache; - -/* ioat hardware assumes at least two sources for raid operations */ -#define src_cnt_to_sw(x) ((x) + 2) -#define src_cnt_to_hw(x) ((x) - 2) -#define ndest_to_sw(x) ((x) + 1) -#define ndest_to_hw(x) ((x) - 1) -#define src16_cnt_to_sw(x) ((x) + 9) -#define src16_cnt_to_hw(x) ((x) - 9) - -/* provide a lookup table for setting the source address in the base or - * extended descriptor of an xor or pq descriptor - */ -static const u8 xor_idx_to_desc = 0xe0; -static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; -static const u8 pq_idx_to_desc = 0xf8; -static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2 }; -static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; -static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, - 0, 1, 2, 3, 4, 5, 6 }; - -static void ioat3_eh(struct ioat2_dma_chan *ioat); - -static void xor_set_src(struct ioat_raw_descriptor *descs[2], - dma_addr_t addr, u32 offset, int idx) -{ - struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; - - raw->field[xor_idx_to_field[idx]] = addr + offset; -} - -static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) -{ - struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; - - return raw->field[pq_idx_to_field[idx]]; -} - -static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) -{ - struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; - - return raw->field[pq16_idx_to_field[idx]]; -} - -static void pq_set_src(struct ioat_raw_descriptor *descs[2], - dma_addr_t addr, u32 offset, u8 coef, int idx) -{ - struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; - struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; - - raw->field[pq_idx_to_field[idx]] = addr + offset; - pq->coef[idx] = coef; -} - -static bool is_jf_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_JSF0: - case PCI_DEVICE_ID_INTEL_IOAT_JSF1: - case PCI_DEVICE_ID_INTEL_IOAT_JSF2: - case PCI_DEVICE_ID_INTEL_IOAT_JSF3: - case PCI_DEVICE_ID_INTEL_IOAT_JSF4: - case PCI_DEVICE_ID_INTEL_IOAT_JSF5: - case PCI_DEVICE_ID_INTEL_IOAT_JSF6: - case PCI_DEVICE_ID_INTEL_IOAT_JSF7: - case PCI_DEVICE_ID_INTEL_IOAT_JSF8: - case PCI_DEVICE_ID_INTEL_IOAT_JSF9: - return true; - default: - return false; - } -} - -static bool is_snb_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_SNB0: - case PCI_DEVICE_ID_INTEL_IOAT_SNB1: - case PCI_DEVICE_ID_INTEL_IOAT_SNB2: - case PCI_DEVICE_ID_INTEL_IOAT_SNB3: - case PCI_DEVICE_ID_INTEL_IOAT_SNB4: - case PCI_DEVICE_ID_INTEL_IOAT_SNB5: - case PCI_DEVICE_ID_INTEL_IOAT_SNB6: - case PCI_DEVICE_ID_INTEL_IOAT_SNB7: - case PCI_DEVICE_ID_INTEL_IOAT_SNB8: - case PCI_DEVICE_ID_INTEL_IOAT_SNB9: - return true; - default: - return false; - } -} - -static bool is_ivb_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_IVB0: - case PCI_DEVICE_ID_INTEL_IOAT_IVB1: - case PCI_DEVICE_ID_INTEL_IOAT_IVB2: - case PCI_DEVICE_ID_INTEL_IOAT_IVB3: - case PCI_DEVICE_ID_INTEL_IOAT_IVB4: - case PCI_DEVICE_ID_INTEL_IOAT_IVB5: - case PCI_DEVICE_ID_INTEL_IOAT_IVB6: - case PCI_DEVICE_ID_INTEL_IOAT_IVB7: - case PCI_DEVICE_ID_INTEL_IOAT_IVB8: - case PCI_DEVICE_ID_INTEL_IOAT_IVB9: - return true; - default: - return false; - } - -} - -static bool is_hsw_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_HSW0: - case PCI_DEVICE_ID_INTEL_IOAT_HSW1: - case PCI_DEVICE_ID_INTEL_IOAT_HSW2: - case PCI_DEVICE_ID_INTEL_IOAT_HSW3: - case PCI_DEVICE_ID_INTEL_IOAT_HSW4: - case PCI_DEVICE_ID_INTEL_IOAT_HSW5: - case PCI_DEVICE_ID_INTEL_IOAT_HSW6: - case PCI_DEVICE_ID_INTEL_IOAT_HSW7: - case PCI_DEVICE_ID_INTEL_IOAT_HSW8: - case PCI_DEVICE_ID_INTEL_IOAT_HSW9: - return true; - default: - return false; - } - -} - -static bool is_xeon_cb32(struct pci_dev *pdev) -{ - return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || - is_hsw_ioat(pdev); -} - -static bool is_bwd_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_BWD0: - case PCI_DEVICE_ID_INTEL_IOAT_BWD1: - case PCI_DEVICE_ID_INTEL_IOAT_BWD2: - case PCI_DEVICE_ID_INTEL_IOAT_BWD3: - /* even though not Atom, BDX-DE has same DMA silicon */ - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: - return true; - default: - return false; - } -} - -static bool is_bwd_noraid(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_BWD2: - case PCI_DEVICE_ID_INTEL_IOAT_BWD3: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: - return true; - default: - return false; - } - -} - -static void pq16_set_src(struct ioat_raw_descriptor *desc[3], - dma_addr_t addr, u32 offset, u8 coef, unsigned idx) -{ - struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; - struct ioat_pq16a_descriptor *pq16 = - (struct ioat_pq16a_descriptor *)desc[1]; - struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; - - raw->field[pq16_idx_to_field[idx]] = addr + offset; - - if (idx < 8) - pq->coef[idx] = coef; - else - pq16->coef[idx - 8] = coef; -} - -static struct ioat_sed_ent * -ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) -{ - struct ioat_sed_ent *sed; - gfp_t flags = __GFP_ZERO | GFP_ATOMIC; - - sed = kmem_cache_alloc(ioat3_sed_cache, flags); - if (!sed) - return NULL; - - sed->hw_pool = hw_pool; - sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool], - flags, &sed->dma); - if (!sed->hw) { - kmem_cache_free(ioat3_sed_cache, sed); - return NULL; - } - - return sed; -} - -static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) -{ - if (!sed) - return; - - dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); - kmem_cache_free(ioat3_sed_cache, sed); -} - -static bool desc_has_ext(struct ioat_ring_ent *desc) -{ - struct ioat_dma_descriptor *hw = desc->hw; - - if (hw->ctl_f.op == IOAT_OP_XOR || - hw->ctl_f.op == IOAT_OP_XOR_VAL) { - struct ioat_xor_descriptor *xor = desc->xor; - - if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) - return true; - } else if (hw->ctl_f.op == IOAT_OP_PQ || - hw->ctl_f.op == IOAT_OP_PQ_VAL) { - struct ioat_pq_descriptor *pq = desc->pq; - - if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) - return true; - } - - return false; -} - -static u64 ioat3_get_current_completion(struct ioat_chan_common *chan) -{ - u64 phys_complete; - u64 completion; - - completion = *chan->completion; - phys_complete = ioat_chansts_to_addr(completion); - - dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, - (unsigned long long) phys_complete); - - return phys_complete; -} - -static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, - u64 *phys_complete) -{ - *phys_complete = ioat3_get_current_completion(chan); - if (*phys_complete == chan->last_completion) - return false; - - clear_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - - return true; -} - -static void -desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc) -{ - struct ioat_dma_descriptor *hw = desc->hw; - - switch (hw->ctl_f.op) { - case IOAT_OP_PQ_VAL: - case IOAT_OP_PQ_VAL_16S: - { - struct ioat_pq_descriptor *pq = desc->pq; - - /* check if there's error written */ - if (!pq->dwbes_f.wbes) - return; - - /* need to set a chanerr var for checking to clear later */ - - if (pq->dwbes_f.p_val_err) - *desc->result |= SUM_CHECK_P_RESULT; - - if (pq->dwbes_f.q_val_err) - *desc->result |= SUM_CHECK_Q_RESULT; - - return; - } - default: - return; - } -} - -/** - * __cleanup - reclaim used descriptors - * @ioat: channel (ring) to clean - * - * The difference from the dma_v2.c __cleanup() is that this routine - * handles extended descriptors and dma-unmapping raid operations. - */ -static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) -{ - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *desc; - bool seen_current = false; - int idx = ioat->tail, i; - u16 active; - - dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); - - /* - * At restart of the channel, the completion address and the - * channel status will be 0 due to starting a new chain. Since - * it's new chain and the first descriptor "fails", there is - * nothing to clean up. We do not want to reap the entire submitted - * chain due to this 0 address value and then BUG. - */ - if (!phys_complete) - return; - - active = ioat2_ring_active(ioat); - for (i = 0; i < active && !seen_current; i++) { - struct dma_async_tx_descriptor *tx; - - smp_read_barrier_depends(); - prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); - desc = ioat2_get_ring_ent(ioat, idx + i); - dump_desc_dbg(ioat, desc); - - /* set err stat if we are using dwbes */ - if (device->cap & IOAT_CAP_DWBES) - desc_get_errstat(ioat, desc); - - tx = &desc->txd; - if (tx->cookie) { - dma_cookie_complete(tx); - dma_descriptor_unmap(tx); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - - if (tx->phys == phys_complete) - seen_current = true; - - /* skip extended descriptors */ - if (desc_has_ext(desc)) { - BUG_ON(i + 1 >= active); - i++; - } - - /* cleanup super extended descriptors */ - if (desc->sed) { - ioat3_free_sed(device, desc->sed); - desc->sed = NULL; - } - } - smp_mb(); /* finish all descriptor reads before incrementing tail */ - ioat->tail = idx + i; - BUG_ON(active && !seen_current); /* no active descs have written a completion? */ - chan->last_completion = phys_complete; - - if (active - i == 0) { - dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", - __func__); - clear_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } - /* 5 microsecond delay per pending descriptor */ - writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), - chan->device->reg_base + IOAT_INTRDELAY_OFFSET); -} - -static void ioat3_cleanup(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - u64 phys_complete; - - spin_lock_bh(&chan->cleanup_lock); - - if (ioat3_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - if (is_ioat_halted(*chan->completion)) { - u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - - if (chanerr & IOAT_CHANERR_HANDLE_MASK) { - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - ioat3_eh(ioat); - } - } - - spin_unlock_bh(&chan->cleanup_lock); -} - -static void ioat3_cleanup_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - - ioat3_cleanup(ioat); - if (!test_bit(IOAT_RUN, &chan->state)) - return; - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); -} - -static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - u64 phys_complete; - - ioat2_quiesce(chan, 0); - if (ioat3_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - __ioat2_restart_chan(ioat); -} - -static void ioat3_eh(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - struct pci_dev *pdev = to_pdev(chan); - struct ioat_dma_descriptor *hw; - struct dma_async_tx_descriptor *tx; - u64 phys_complete; - struct ioat_ring_ent *desc; - u32 err_handled = 0; - u32 chanerr_int; - u32 chanerr; - - /* cleanup so tail points to descriptor that caused the error */ - if (ioat3_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); - - dev_dbg(to_dev(chan), "%s: error = %x:%x\n", - __func__, chanerr, chanerr_int); - - desc = ioat2_get_ring_ent(ioat, ioat->tail); - hw = desc->hw; - dump_desc_dbg(ioat, desc); - - switch (hw->ctl_f.op) { - case IOAT_OP_XOR_VAL: - if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { - *desc->result |= SUM_CHECK_P_RESULT; - err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; - } - break; - case IOAT_OP_PQ_VAL: - case IOAT_OP_PQ_VAL_16S: - if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { - *desc->result |= SUM_CHECK_P_RESULT; - err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; - } - if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { - *desc->result |= SUM_CHECK_Q_RESULT; - err_handled |= IOAT_CHANERR_XOR_Q_ERR; - } - break; - } - - /* fault on unhandled error or spurious halt */ - if (chanerr ^ err_handled || chanerr == 0) { - dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n", - __func__, chanerr, err_handled); - BUG(); - } else { /* cleanup the faulty descriptor */ - tx = &desc->txd; - if (tx->cookie) { - dma_cookie_complete(tx); - dma_descriptor_unmap(tx); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - } - - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); - - /* mark faulting descriptor as complete */ - *chan->completion = desc->txd.phys; - - spin_lock_bh(&ioat->prep_lock); - ioat3_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); -} - -static void check_active(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - if (ioat2_ring_active(ioat)) { - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - return; - } - - if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state)) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - else if (ioat->alloc_order > ioat_get_alloc_order()) { - /* if the ring is idle, empty, and oversized try to step - * down the size - */ - reshape_ring(ioat, ioat->alloc_order - 1); - - /* keep shrinking until we get back to our minimum - * default size - */ - if (ioat->alloc_order > ioat_get_alloc_order()) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } - -} - -static void ioat3_timer_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - u64 status; - - status = ioat_chansts(chan); - - /* when halted due to errors check for channel - * programming errors before advancing the completion state - */ - if (is_ioat_halted(status)) { - u32 chanerr; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "%s: Channel halted (%x)\n", - __func__, chanerr); - if (test_bit(IOAT_RUN, &chan->state)) - BUG_ON(is_ioat_bug(chanerr)); - else /* we never got off the ground */ - return; - } - - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { - spin_lock_bh(&ioat->prep_lock); - ioat3_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - return; - } else { - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - } - - - if (ioat2_ring_active(ioat)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - else { - spin_lock_bh(&ioat->prep_lock); - check_active(ioat); - spin_unlock_bh(&ioat->prep_lock); - } - spin_unlock_bh(&chan->cleanup_lock); -} - -static enum dma_status -ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, - struct dma_tx_state *txstate) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - enum dma_status ret; - - ret = dma_cookie_status(c, cookie, txstate); - if (ret == DMA_COMPLETE) - return ret; - - ioat3_cleanup(ioat); - - return dma_cookie_status(c, cookie, txstate); -} - -static struct dma_async_tx_descriptor * -__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, - dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_ring_ent *compl_desc; - struct ioat_ring_ent *desc; - struct ioat_ring_ent *ext; - size_t total_len = len; - struct ioat_xor_descriptor *xor; - struct ioat_xor_ext_descriptor *xor_ex = NULL; - struct ioat_dma_descriptor *hw; - int num_descs, with_ext, idx, i; - u32 offset = 0; - u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; - - BUG_ON(src_cnt < 2); - - num_descs = ioat2_xferlen_to_descs(ioat, len); - /* we need 2x the number of descriptors to cover greater than 5 - * sources - */ - if (src_cnt > 5) { - with_ext = 1; - num_descs *= 2; - } else - with_ext = 0; - - /* completion writes from the raid engine may pass completion - * writes from the legacy engine, so we need one extra null - * (legacy) descriptor to ensure all completion writes arrive in - * order. - */ - if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - struct ioat_raw_descriptor *descs[2]; - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - int s; - - desc = ioat2_get_ring_ent(ioat, idx + i); - xor = desc->xor; - - /* save a branch by unconditionally retrieving the - * extended descriptor xor_set_src() knows to not write - * to it in the single descriptor case - */ - ext = ioat2_get_ring_ent(ioat, idx + i + 1); - xor_ex = ext->xor_ex; - - descs[0] = (struct ioat_raw_descriptor *) xor; - descs[1] = (struct ioat_raw_descriptor *) xor_ex; - for (s = 0; s < src_cnt; s++) - xor_set_src(descs, src[s], offset, s); - xor->size = xfer_size; - xor->dst_addr = dest + offset; - xor->ctl = 0; - xor->ctl_f.op = op; - xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); - - len -= xfer_size; - offset += xfer_size; - dump_desc_dbg(ioat, desc); - } while ((i += 1 + with_ext) < num_descs); - - /* last xor descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - - /* completion descriptor carries interrupt bit */ - compl_desc = ioat2_get_ring_ent(ioat, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat, compl_desc); - - /* we leave the channel locked to ensure in order submission */ - return &compl_desc->txd; -} - -static struct dma_async_tx_descriptor * -ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, - unsigned int src_cnt, size_t len, unsigned long flags) -{ - return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, - unsigned int src_cnt, size_t len, - enum sum_check_flags *result, unsigned long flags) -{ - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *result = 0; - - return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], - src_cnt - 1, len, flags); -} - -static void -dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) -{ - struct device *dev = to_dev(&ioat->base); - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; - struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; - int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); - int i; - - dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" - " src_cnt: %d)\n", - desc_id(desc), (unsigned long long) desc->txd.phys, - (unsigned long long) (pq_ex ? pq_ex->next : pq->next), - desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, - pq->ctl_f.compl_write, - pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", - pq->ctl_f.src_cnt); - for (i = 0; i < src_cnt; i++) - dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, - (unsigned long long) pq_get_src(descs, i), pq->coef[i]); - dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); - dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); - dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); -} - -static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat, - struct ioat_ring_ent *desc) -{ - struct device *dev = to_dev(&ioat->base); - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_raw_descriptor *descs[] = { (void *)pq, - (void *)pq, - (void *)pq }; - int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); - int i; - - if (desc->sed) { - descs[1] = (void *)desc->sed->hw; - descs[2] = (void *)desc->sed->hw + 64; - } - - dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" - " src_cnt: %d)\n", - desc_id(desc), (unsigned long long) desc->txd.phys, - (unsigned long long) pq->next, - desc->txd.flags, pq->size, pq->ctl, - pq->ctl_f.op, pq->ctl_f.int_en, - pq->ctl_f.compl_write, - pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", - pq->ctl_f.src_cnt); - for (i = 0; i < src_cnt; i++) { - dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, - (unsigned long long) pq16_get_src(descs, i), - pq->coef[i]); - } - dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); - dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); -} - -static struct dma_async_tx_descriptor * -__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, - const dma_addr_t *dst, const dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *compl_desc; - struct ioat_ring_ent *desc; - struct ioat_ring_ent *ext; - size_t total_len = len; - struct ioat_pq_descriptor *pq; - struct ioat_pq_ext_descriptor *pq_ex = NULL; - struct ioat_dma_descriptor *hw; - u32 offset = 0; - u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; - int i, s, idx, with_ext, num_descs; - int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0; - - dev_dbg(to_dev(chan), "%s\n", __func__); - /* the engine requires at least two sources (we provide - * at least 1 implied source in the DMA_PREP_CONTINUE case) - */ - BUG_ON(src_cnt + dmaf_continue(flags) < 2); - - num_descs = ioat2_xferlen_to_descs(ioat, len); - /* we need 2x the number of descriptors to cover greater than 3 - * sources (we need 1 extra source in the q-only continuation - * case and 3 extra sources in the p+q continuation case. - */ - if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || - (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { - with_ext = 1; - num_descs *= 2; - } else - with_ext = 0; - - /* completion writes from the raid engine may pass completion - * writes from the legacy engine, so we need one extra null - * (legacy) descriptor to ensure all completion writes arrive in - * order. - */ - if (likely(num_descs) && - ioat2_check_space_lock(ioat, num_descs + cb32) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - struct ioat_raw_descriptor *descs[2]; - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - pq = desc->pq; - - /* save a branch by unconditionally retrieving the - * extended descriptor pq_set_src() knows to not write - * to it in the single descriptor case - */ - ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); - pq_ex = ext->pq_ex; - - descs[0] = (struct ioat_raw_descriptor *) pq; - descs[1] = (struct ioat_raw_descriptor *) pq_ex; - - for (s = 0; s < src_cnt; s++) - pq_set_src(descs, src[s], offset, scf[s], s); - - /* see the comment for dma_maxpq in include/linux/dmaengine.h */ - if (dmaf_p_disabled_continue(flags)) - pq_set_src(descs, dst[1], offset, 1, s++); - else if (dmaf_continue(flags)) { - pq_set_src(descs, dst[0], offset, 0, s++); - pq_set_src(descs, dst[1], offset, 1, s++); - pq_set_src(descs, dst[1], offset, 0, s++); - } - pq->size = xfer_size; - pq->p_addr = dst[0] + offset; - pq->q_addr = dst[1] + offset; - pq->ctl = 0; - pq->ctl_f.op = op; - /* we turn on descriptor write back error status */ - if (device->cap & IOAT_CAP_DWBES) - pq->ctl_f.wb_en = result ? 1 : 0; - pq->ctl_f.src_cnt = src_cnt_to_hw(s); - pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); - pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); - - len -= xfer_size; - offset += xfer_size; - } while ((i += 1 + with_ext) < num_descs); - - /* last pq descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - dump_pq_desc_dbg(ioat, desc, ext); - - if (!cb32) { - pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - pq->ctl_f.compl_write = 1; - compl_desc = desc; - } else { - /* completion descriptor carries interrupt bit */ - compl_desc = ioat2_get_ring_ent(ioat, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat, compl_desc); - } - - - /* we leave the channel locked to ensure in order submission */ - return &compl_desc->txd; -} - -static struct dma_async_tx_descriptor * -__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, - const dma_addr_t *dst, const dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *desc; - size_t total_len = len; - struct ioat_pq_descriptor *pq; - u32 offset = 0; - u8 op; - int i, s, idx, num_descs; - - /* this function is only called with 9-16 sources */ - op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; - - dev_dbg(to_dev(chan), "%s\n", __func__); - - num_descs = ioat2_xferlen_to_descs(ioat, len); - - /* - * 16 source pq is only available on cb3.3 and has no completion - * write hw bug. - */ - if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0) - idx = ioat->head; - else - return NULL; - - i = 0; - - do { - struct ioat_raw_descriptor *descs[4]; - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - pq = desc->pq; - - descs[0] = (struct ioat_raw_descriptor *) pq; - - desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3); - if (!desc->sed) { - dev_err(to_dev(chan), - "%s: no free sed entries\n", __func__); - return NULL; - } - - pq->sed_addr = desc->sed->dma; - desc->sed->parent = desc; - - descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; - descs[2] = (void *)descs[1] + 64; - - for (s = 0; s < src_cnt; s++) - pq16_set_src(descs, src[s], offset, scf[s], s); - - /* see the comment for dma_maxpq in include/linux/dmaengine.h */ - if (dmaf_p_disabled_continue(flags)) - pq16_set_src(descs, dst[1], offset, 1, s++); - else if (dmaf_continue(flags)) { - pq16_set_src(descs, dst[0], offset, 0, s++); - pq16_set_src(descs, dst[1], offset, 1, s++); - pq16_set_src(descs, dst[1], offset, 0, s++); - } - - pq->size = xfer_size; - pq->p_addr = dst[0] + offset; - pq->q_addr = dst[1] + offset; - pq->ctl = 0; - pq->ctl_f.op = op; - pq->ctl_f.src_cnt = src16_cnt_to_hw(s); - /* we turn on descriptor write back error status */ - if (device->cap & IOAT_CAP_DWBES) - pq->ctl_f.wb_en = result ? 1 : 0; - pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); - pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); - - len -= xfer_size; - offset += xfer_size; - } while (++i < num_descs); - - /* last pq descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - - /* with cb3.3 we should be able to do completion w/o a null desc */ - pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - pq->ctl_f.compl_write = 1; - - dump_pq16_desc_dbg(ioat, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - -static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) -{ - if (dmaf_p_disabled_continue(flags)) - return src_cnt + 1; - else if (dmaf_continue(flags)) - return src_cnt + 3; - else - return src_cnt; -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - unsigned long flags) -{ - /* specify valid address for disabled result */ - if (flags & DMA_PREP_PQ_DISABLE_P) - dst[0] = dst[1]; - if (flags & DMA_PREP_PQ_DISABLE_Q) - dst[1] = dst[0]; - - /* handle the single source multiply case from the raid6 - * recovery path - */ - if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { - dma_addr_t single_source[2]; - unsigned char single_source_coef[2]; - - BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); - single_source[0] = src[0]; - single_source[1] = src[0]; - single_source_coef[0] = scf[0]; - single_source_coef[1] = 0; - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, NULL, dst, single_source, - 2, single_source_coef, len, - flags) : - __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, - single_source_coef, len, flags); - - } else { - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt, - scf, len, flags) : - __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, - scf, len, flags); - } -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - enum sum_check_flags *pqres, unsigned long flags) -{ - /* specify valid address for disabled result */ - if (flags & DMA_PREP_PQ_DISABLE_P) - pq[0] = pq[1]; - if (flags & DMA_PREP_PQ_DISABLE_Q) - pq[1] = pq[0]; - - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *pqres = 0; - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags) : - __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, - unsigned int src_cnt, size_t len, unsigned long flags) -{ - unsigned char scf[src_cnt]; - dma_addr_t pq[2]; - - memset(scf, 0, src_cnt); - pq[0] = dst; - flags |= DMA_PREP_PQ_DISABLE_Q; - pq[1] = dst; /* specify valid address for disabled result */ - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags) : - __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, - unsigned int src_cnt, size_t len, - enum sum_check_flags *result, unsigned long flags) -{ - unsigned char scf[src_cnt]; - dma_addr_t pq[2]; - - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *result = 0; - - memset(scf, 0, src_cnt); - pq[0] = src[0]; - flags |= DMA_PREP_PQ_DISABLE_Q; - pq[1] = pq[0]; /* specify valid address for disabled result */ - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, - scf, len, flags) : - __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, - scf, len, flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_ring_ent *desc; - struct ioat_dma_descriptor *hw; - - if (ioat2_check_space_lock(ioat, 1) == 0) - desc = ioat2_get_ring_ent(ioat, ioat->head); - else - return NULL; - - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - - desc->txd.flags = flags; - desc->len = 1; - - dump_desc_dbg(ioat, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - -static void ioat3_dma_test_callback(void *dma_async_param) -{ - struct completion *cmp = dma_async_param; - - complete(cmp); -} - -#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ -static int ioat_xor_val_self_test(struct ioatdma_device *device) -{ - int i, src_idx; - struct page *dest; - struct page *xor_srcs[IOAT_NUM_SRC_TEST]; - struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; - dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; - dma_addr_t dest_dma; - struct dma_async_tx_descriptor *tx; - struct dma_chan *dma_chan; - dma_cookie_t cookie; - u8 cmp_byte = 0; - u32 cmp_word; - u32 xor_val_result; - int err = 0; - struct completion cmp; - unsigned long tmo; - struct device *dev = &device->pdev->dev; - struct dma_device *dma = &device->common; - u8 op = 0; - - dev_dbg(dev, "%s\n", __func__); - - if (!dma_has_cap(DMA_XOR, dma->cap_mask)) - return 0; - - for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { - xor_srcs[src_idx] = alloc_page(GFP_KERNEL); - if (!xor_srcs[src_idx]) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - } - - dest = alloc_page(GFP_KERNEL); - if (!dest) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - - /* Fill in src buffers */ - for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { - u8 *ptr = page_address(xor_srcs[src_idx]); - for (i = 0; i < PAGE_SIZE; i++) - ptr[i] = (1 << src_idx); - } - - for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) - cmp_byte ^= (u8) (1 << src_idx); - - cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | - (cmp_byte << 8) | cmp_byte; - - memset(page_address(dest), 0, PAGE_SIZE); - - dma_chan = container_of(dma->channels.next, struct dma_chan, - device_node); - if (dma->device_alloc_chan_resources(dma_chan) < 1) { - err = -ENODEV; - goto out; - } - - /* test xor */ - op = IOAT_OP_XOR; - - dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, dest_dma)) - goto dma_unmap; - - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - dma_srcs[i] = DMA_ERROR_CODE; - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) { - dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_srcs[i])) - goto dma_unmap; - } - tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, - IOAT_NUM_SRC_TEST, PAGE_SIZE, - DMA_PREP_INTERRUPT); - - if (!tx) { - dev_err(dev, "Self-test xor prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test xor setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dev, "Self-test xor timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - - dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); - for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { - u32 *ptr = page_address(dest); - if (ptr[i] != cmp_word) { - dev_err(dev, "Self-test xor failed compare\n"); - err = -ENODEV; - goto free_resources; - } - } - dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); - - dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); - - /* skip validate if the capability is not present */ - if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) - goto free_resources; - - op = IOAT_OP_XOR_VAL; - - /* validate the sources with the destintation page */ - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - xor_val_srcs[i] = xor_srcs[i]; - xor_val_srcs[i] = dest; - - xor_val_result = 1; - - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = DMA_ERROR_CODE; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { - dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_srcs[i])) - goto dma_unmap; - } - tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, - IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, - &xor_val_result, DMA_PREP_INTERRUPT); - if (!tx) { - dev_err(dev, "Self-test zero prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test zero setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dev, "Self-test validate timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - - if (xor_val_result != 0) { - dev_err(dev, "Self-test validate failed compare\n"); - err = -ENODEV; - goto free_resources; - } - - memset(page_address(dest), 0, PAGE_SIZE); - - /* test for non-zero parity sum */ - op = IOAT_OP_XOR_VAL; - - xor_val_result = 0; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = DMA_ERROR_CODE; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { - dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_srcs[i])) - goto dma_unmap; - } - tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, - IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, - &xor_val_result, DMA_PREP_INTERRUPT); - if (!tx) { - dev_err(dev, "Self-test 2nd zero prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test 2nd zero setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dev, "Self-test 2nd validate timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - if (xor_val_result != SUM_CHECK_P_RESULT) { - dev_err(dev, "Self-test validate failed compare\n"); - err = -ENODEV; - goto dma_unmap; - } - - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - - goto free_resources; -dma_unmap: - if (op == IOAT_OP_XOR) { - if (dest_dma != DMA_ERROR_CODE) - dma_unmap_page(dev, dest_dma, PAGE_SIZE, - DMA_FROM_DEVICE); - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - if (dma_srcs[i] != DMA_ERROR_CODE) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, - DMA_TO_DEVICE); - } else if (op == IOAT_OP_XOR_VAL) { - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - if (dma_srcs[i] != DMA_ERROR_CODE) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, - DMA_TO_DEVICE); - } -free_resources: - dma->device_free_chan_resources(dma_chan); -out: - src_idx = IOAT_NUM_SRC_TEST; - while (src_idx--) - __free_page(xor_srcs[src_idx]); - __free_page(dest); - return err; -} - -static int ioat3_dma_self_test(struct ioatdma_device *device) -{ - int rc = ioat_dma_self_test(device); - - if (rc) - return rc; - - rc = ioat_xor_val_self_test(device); - if (rc) - return rc; - - return 0; -} - -static int ioat3_irq_reinit(struct ioatdma_device *device) -{ - struct pci_dev *pdev = device->pdev; - int irq = pdev->irq, i; - - if (!is_bwd_ioat(pdev)) - return 0; - - switch (device->irq_mode) { - case IOAT_MSIX: - for (i = 0; i < device->common.chancnt; i++) { - struct msix_entry *msix = &device->msix_entries[i]; - struct ioat_chan_common *chan; - - chan = ioat_chan_by_index(device, i); - devm_free_irq(&pdev->dev, msix->vector, chan); - } - - pci_disable_msix(pdev); - break; - case IOAT_MSI: - pci_disable_msi(pdev); - /* fall through */ - case IOAT_INTX: - devm_free_irq(&pdev->dev, irq, device); - break; - default: - return 0; - } - device->irq_mode = IOAT_NOIRQ; - - return ioat_dma_setup_interrupts(device); -} - -static int ioat3_reset_hw(struct ioat_chan_common *chan) -{ - /* throw away whatever the channel was doing and get it - * initialized, with ioat3 specific workarounds - */ - struct ioatdma_device *device = chan->device; - struct pci_dev *pdev = device->pdev; - u32 chanerr; - u16 dev_id; - int err; - - ioat2_quiesce(chan, msecs_to_jiffies(100)); - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - - if (device->version < IOAT_VER_3_3) { - /* clear any pending errors */ - err = pci_read_config_dword(pdev, - IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); - if (err) { - dev_err(&pdev->dev, - "channel error register unreachable\n"); - return err; - } - pci_write_config_dword(pdev, - IOAT_PCI_CHANERR_INT_OFFSET, chanerr); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { - pci_write_config_dword(pdev, - IOAT_PCI_DMAUNCERRSTS_OFFSET, - 0x10); - } - } - - err = ioat2_reset_sync(chan, msecs_to_jiffies(200)); - if (!err) - err = ioat3_irq_reinit(device); - - if (err) - dev_err(&pdev->dev, "Failed to reset: %d\n", err); - - return err; -} - -static void ioat3_intr_quirk(struct ioatdma_device *device) -{ - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - u32 errmask; - - dma = &device->common; - - /* - * if we have descriptor write back error status, we mask the - * error interrupts - */ - if (device->cap & IOAT_CAP_DWBES) { - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - errmask = readl(chan->reg_base + - IOAT_CHANERR_MASK_OFFSET); - errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | - IOAT_CHANERR_XOR_Q_ERR; - writel(errmask, chan->reg_base + - IOAT_CHANERR_MASK_OFFSET); - } - } -} - -int ioat3_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - int dca_en = system_has_dca_enabled(pdev); - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - bool is_raid_device = false; - int err; - - device->enumerate_channels = ioat2_enumerate_channels; - device->reset_hw = ioat3_reset_hw; - device->self_test = ioat3_dma_self_test; - device->intr_quirk = ioat3_intr_quirk; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; - dma->device_issue_pending = ioat2_issue_pending; - dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; - dma->device_free_chan_resources = ioat2_free_chan_resources; - - dma_cap_set(DMA_INTERRUPT, dma->cap_mask); - dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; - - device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); - - if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev)) - device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); - - /* dca is incompatible with raid operations */ - if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) - device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); - - if (device->cap & IOAT_CAP_XOR) { - is_raid_device = true; - dma->max_xor = 8; - - dma_cap_set(DMA_XOR, dma->cap_mask); - dma->device_prep_dma_xor = ioat3_prep_xor; - - dma_cap_set(DMA_XOR_VAL, dma->cap_mask); - dma->device_prep_dma_xor_val = ioat3_prep_xor_val; - } - - if (device->cap & IOAT_CAP_PQ) { - is_raid_device = true; - - dma->device_prep_dma_pq = ioat3_prep_pq; - dma->device_prep_dma_pq_val = ioat3_prep_pq_val; - dma_cap_set(DMA_PQ, dma->cap_mask); - dma_cap_set(DMA_PQ_VAL, dma->cap_mask); - - if (device->cap & IOAT_CAP_RAID16SS) { - dma_set_maxpq(dma, 16, 0); - } else { - dma_set_maxpq(dma, 8, 0); - } - - if (!(device->cap & IOAT_CAP_XOR)) { - dma->device_prep_dma_xor = ioat3_prep_pqxor; - dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; - dma_cap_set(DMA_XOR, dma->cap_mask); - dma_cap_set(DMA_XOR_VAL, dma->cap_mask); - - if (device->cap & IOAT_CAP_RAID16SS) { - dma->max_xor = 16; - } else { - dma->max_xor = 8; - } - } - } - - dma->device_tx_status = ioat3_tx_status; - device->cleanup_fn = ioat3_cleanup_event; - device->timer_fn = ioat3_timer_event; - - /* starting with CB3.3 super extended descriptors are supported */ - if (device->cap & IOAT_CAP_RAID16SS) { - char pool_name[14]; - int i; - - for (i = 0; i < MAX_SED_POOLS; i++) { - snprintf(pool_name, 14, "ioat_hw%d_sed", i); - - /* allocate SED DMA pool */ - device->sed_hw_pool[i] = dmam_pool_create(pool_name, - &pdev->dev, - SED_SIZE * (i + 1), 64, 0); - if (!device->sed_hw_pool[i]) - return -ENOMEM; - - } - } - - err = ioat_probe(device); - if (err) - return err; - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - - ioat_kobject_add(device, &ioat2_ktype); - - if (dca) - device->dca = ioat3_dca_init(pdev, device->reg_base); - - return 0; -} diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index a3e731edce57..690e3b4f8202 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -21,11 +21,6 @@ #define IOAT_MMIO_BAR 0 /* CB device ID's */ -#define IOAT_PCI_DID_5000 0x1A38 -#define IOAT_PCI_DID_CNB 0x360B -#define IOAT_PCI_DID_SCNB 0x65FF -#define IOAT_PCI_DID_SNB 0x402F - #define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 #define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 #define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 @@ -58,6 +53,17 @@ #define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2 0x6f52 #define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3 0x6f53 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX0 0x6f20 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX1 0x6f21 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX2 0x6f22 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX3 0x6f23 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX4 0x6f24 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX5 0x6f25 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX6 0x6f26 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX7 0x6f27 +#define PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e +#define PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f + #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c new file mode 100644 index 000000000000..1c3c9b0abf4e --- /dev/null +++ b/drivers/dma/ioat/init.c @@ -0,0 +1,1314 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/prefetch.h> +#include <linux/dca.h> +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + + /* I/OAT v3.2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) }, + + /* I/OAT v3.3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) }, + + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); + +static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); +static void ioat_remove(struct pci_dev *pdev); +static void +ioat_init_channel(struct ioatdma_device *ioat_dma, + struct ioatdma_chan *ioat_chan, int idx); +static void ioat_intr_quirk(struct ioatdma_device *ioat_dma); +static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma); +static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); +int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); +int ioat_ring_alloc_order = 8; +module_param(ioat_ring_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_alloc_order, + "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)"); +int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; +module_param(ioat_ring_max_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_max_alloc_order, + "ioat+: upper limit for ring size (default: 16)"); +static char ioat_interrupt_style[32] = "msix"; +module_param_string(ioat_interrupt_style, ioat_interrupt_style, + sizeof(ioat_interrupt_style), 0644); +MODULE_PARM_DESC(ioat_interrupt_style, + "set ioat interrupt style: msix (default), msi, intx"); + +struct kmem_cache *ioat_cache; +struct kmem_cache *ioat_sed_cache; + +static bool is_jf_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_JSF0: + case PCI_DEVICE_ID_INTEL_IOAT_JSF1: + case PCI_DEVICE_ID_INTEL_IOAT_JSF2: + case PCI_DEVICE_ID_INTEL_IOAT_JSF3: + case PCI_DEVICE_ID_INTEL_IOAT_JSF4: + case PCI_DEVICE_ID_INTEL_IOAT_JSF5: + case PCI_DEVICE_ID_INTEL_IOAT_JSF6: + case PCI_DEVICE_ID_INTEL_IOAT_JSF7: + case PCI_DEVICE_ID_INTEL_IOAT_JSF8: + case PCI_DEVICE_ID_INTEL_IOAT_JSF9: + return true; + default: + return false; + } +} + +static bool is_snb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_SNB0: + case PCI_DEVICE_ID_INTEL_IOAT_SNB1: + case PCI_DEVICE_ID_INTEL_IOAT_SNB2: + case PCI_DEVICE_ID_INTEL_IOAT_SNB3: + case PCI_DEVICE_ID_INTEL_IOAT_SNB4: + case PCI_DEVICE_ID_INTEL_IOAT_SNB5: + case PCI_DEVICE_ID_INTEL_IOAT_SNB6: + case PCI_DEVICE_ID_INTEL_IOAT_SNB7: + case PCI_DEVICE_ID_INTEL_IOAT_SNB8: + case PCI_DEVICE_ID_INTEL_IOAT_SNB9: + return true; + default: + return false; + } +} + +static bool is_ivb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_IVB0: + case PCI_DEVICE_ID_INTEL_IOAT_IVB1: + case PCI_DEVICE_ID_INTEL_IOAT_IVB2: + case PCI_DEVICE_ID_INTEL_IOAT_IVB3: + case PCI_DEVICE_ID_INTEL_IOAT_IVB4: + case PCI_DEVICE_ID_INTEL_IOAT_IVB5: + case PCI_DEVICE_ID_INTEL_IOAT_IVB6: + case PCI_DEVICE_ID_INTEL_IOAT_IVB7: + case PCI_DEVICE_ID_INTEL_IOAT_IVB8: + case PCI_DEVICE_ID_INTEL_IOAT_IVB9: + return true; + default: + return false; + } + +} + +static bool is_hsw_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_HSW0: + case PCI_DEVICE_ID_INTEL_IOAT_HSW1: + case PCI_DEVICE_ID_INTEL_IOAT_HSW2: + case PCI_DEVICE_ID_INTEL_IOAT_HSW3: + case PCI_DEVICE_ID_INTEL_IOAT_HSW4: + case PCI_DEVICE_ID_INTEL_IOAT_HSW5: + case PCI_DEVICE_ID_INTEL_IOAT_HSW6: + case PCI_DEVICE_ID_INTEL_IOAT_HSW7: + case PCI_DEVICE_ID_INTEL_IOAT_HSW8: + case PCI_DEVICE_ID_INTEL_IOAT_HSW9: + return true; + default: + return false; + } + +} + +static bool is_bdx_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BDX0: + case PCI_DEVICE_ID_INTEL_IOAT_BDX1: + case PCI_DEVICE_ID_INTEL_IOAT_BDX2: + case PCI_DEVICE_ID_INTEL_IOAT_BDX3: + case PCI_DEVICE_ID_INTEL_IOAT_BDX4: + case PCI_DEVICE_ID_INTEL_IOAT_BDX5: + case PCI_DEVICE_ID_INTEL_IOAT_BDX6: + case PCI_DEVICE_ID_INTEL_IOAT_BDX7: + case PCI_DEVICE_ID_INTEL_IOAT_BDX8: + case PCI_DEVICE_ID_INTEL_IOAT_BDX9: + return true; + default: + return false; + } +} + +static bool is_xeon_cb32(struct pci_dev *pdev) +{ + return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || + is_hsw_ioat(pdev) || is_bdx_ioat(pdev); +} + +bool is_bwd_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD0: + case PCI_DEVICE_ID_INTEL_IOAT_BWD1: + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + /* even though not Atom, BDX-DE has same DMA silicon */ + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: + return true; + default: + return false; + } +} + +static bool is_bwd_noraid(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: + return true; + default: + return false; + } + +} + +/* + * Perform a IOAT transaction to verify the HW works. + */ +#define IOAT_TEST_SIZE 2000 + +static void ioat_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +/** + * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. + * @ioat_dma: dma device to be tested + */ +static int ioat_dma_self_test(struct ioatdma_device *ioat_dma) +{ + int i; + u8 *src; + u8 *dest; + struct dma_device *dma = &ioat_dma->dma_dev; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int err = 0; + struct completion cmp; + unsigned long tmo; + unsigned long flags; + + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOAT_TEST_SIZE; i++) + src[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + dev_err(dev, "selftest cannot allocate chan resource\n"); + err = -ENODEV; + goto out; + } + + dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_src)) { + dev_err(dev, "mapping src buffer failed\n"); + goto free_resources; + } + dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_dest)) { + dev_err(dev, "mapping dest buffer failed\n"); + goto unmap_src; + } + flags = DMA_PREP_INTERRUPT; + tx = ioat_dma->dma_dev.device_prep_dma_memcpy(dma_chan, dma_dest, + dma_src, IOAT_TEST_SIZE, + flags); + if (!tx) { + dev_err(dev, "Self-test prep failed, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test setup failed, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) + != DMA_COMPLETE) { + dev_err(dev, "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + if (memcmp(src, dest, IOAT_TEST_SIZE)) { + dev_err(dev, "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +unmap_dma: + dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); +unmap_src: + dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +/** + * ioat_dma_setup_interrupts - setup interrupt handler + * @ioat_dma: ioat dma device + */ +int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + struct pci_dev *pdev = ioat_dma->pdev; + struct device *dev = &pdev->dev; + struct msix_entry *msix; + int i, j, msixcnt; + int err = -EINVAL; + u8 intrctrl = 0; + + if (!strcmp(ioat_interrupt_style, "msix")) + goto msix; + if (!strcmp(ioat_interrupt_style, "msi")) + goto msi; + if (!strcmp(ioat_interrupt_style, "intx")) + goto intx; + dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); + goto err_no_irq; + +msix: + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = ioat_dma->dma_dev.chancnt; + for (i = 0; i < msixcnt; i++) + ioat_dma->msix_entries[i].entry = i; + + err = pci_enable_msix_exact(pdev, ioat_dma->msix_entries, msixcnt); + if (err) + goto msi; + + for (i = 0; i < msixcnt; i++) { + msix = &ioat_dma->msix_entries[i]; + ioat_chan = ioat_chan_by_index(ioat_dma, i); + err = devm_request_irq(dev, msix->vector, + ioat_dma_do_interrupt_msix, 0, + "ioat-msix", ioat_chan); + if (err) { + for (j = 0; j < i; j++) { + msix = &ioat_dma->msix_entries[j]; + ioat_chan = ioat_chan_by_index(ioat_dma, j); + devm_free_irq(dev, msix->vector, ioat_chan); + } + goto msi; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + ioat_dma->irq_mode = IOAT_MSIX; + goto done; + +msi: + err = pci_enable_msi(pdev); + if (err) + goto intx; + + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, + "ioat-msi", ioat_dma); + if (err) { + pci_disable_msi(pdev); + goto intx; + } + ioat_dma->irq_mode = IOAT_MSI; + goto done; + +intx: + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", ioat_dma); + if (err) + goto err_no_irq; + + ioat_dma->irq_mode = IOAT_INTX; +done: + if (is_bwd_ioat(pdev)) + ioat_intr_quirk(ioat_dma); + intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; + writeb(intrctrl, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); + return 0; + +err_no_irq: + /* Disable all interrupt generation */ + writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); + ioat_dma->irq_mode = IOAT_NOIRQ; + dev_err(dev, "no usable interrupts\n"); + return err; +} + +static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma) +{ + /* Disable all interrupt generation */ + writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); +} + +static int ioat_probe(struct ioatdma_device *ioat_dma) +{ + int err = -ENODEV; + struct dma_device *dma = &ioat_dma->dma_dev; + struct pci_dev *pdev = ioat_dma->pdev; + struct device *dev = &pdev->dev; + + /* DMA coherent memory pool for DMA descriptor allocations */ + ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev, + sizeof(struct ioat_dma_descriptor), + 64, 0); + if (!ioat_dma->dma_pool) { + err = -ENOMEM; + goto err_dma_pool; + } + + ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev, + sizeof(u64), + SMP_CACHE_BYTES, + SMP_CACHE_BYTES); + + if (!ioat_dma->completion_pool) { + err = -ENOMEM; + goto err_completion_pool; + } + + ioat_enumerate_channels(ioat_dma); + + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->dev = &pdev->dev; + + if (!dma->chancnt) { + dev_err(dev, "channel enumeration error\n"); + goto err_setup_interrupts; + } + + err = ioat_dma_setup_interrupts(ioat_dma); + if (err) + goto err_setup_interrupts; + + err = ioat3_dma_self_test(ioat_dma); + if (err) + goto err_self_test; + + return 0; + +err_self_test: + ioat_disable_interrupts(ioat_dma); +err_setup_interrupts: + pci_pool_destroy(ioat_dma->completion_pool); +err_completion_pool: + pci_pool_destroy(ioat_dma->dma_pool); +err_dma_pool: + return err; +} + +static int ioat_register(struct ioatdma_device *ioat_dma) +{ + int err = dma_async_device_register(&ioat_dma->dma_dev); + + if (err) { + ioat_disable_interrupts(ioat_dma); + pci_pool_destroy(ioat_dma->completion_pool); + pci_pool_destroy(ioat_dma->dma_pool); + } + + return err; +} + +static void ioat_dma_remove(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + + ioat_disable_interrupts(ioat_dma); + + ioat_kobject_del(ioat_dma); + + dma_async_device_unregister(dma); + + pci_pool_destroy(ioat_dma->dma_pool); + pci_pool_destroy(ioat_dma->completion_pool); + + INIT_LIST_HEAD(&dma->channels); +} + +/** + * ioat_enumerate_channels - find and initialize the device's channels + * @ioat_dma: the ioat dma device to be enumerated + */ +static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_device *dma = &ioat_dma->dma_dev; + u8 xfercap_log; + int i; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(ioat_dma->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(ioat_dma->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(ioat_dma->idx)); + dma->chancnt = ARRAY_SIZE(ioat_dma->idx); + } + xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_log &= 0x1f; /* bits [4:0] valid */ + if (xfercap_log == 0) + return 0; + dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); + + for (i = 0; i < dma->chancnt; i++) { + ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL); + if (!ioat_chan) + break; + + ioat_init_channel(ioat_dma, ioat_chan, i); + ioat_chan->xfercap_log = xfercap_log; + spin_lock_init(&ioat_chan->prep_lock); + if (ioat_reset_hw(ioat_chan)) { + i = 0; + break; + } + } + dma->chancnt = i; + return i; +} + +/** + * ioat_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat_free_chan_resources(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + const int total_descs = 1 << ioat_chan->alloc_order; + int descs; + int i; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (!ioat_chan->ring) + return; + + ioat_stop(ioat_chan); + ioat_reset_hw(ioat_chan); + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + descs = ioat_ring_space(ioat_chan); + dev_dbg(to_dev(ioat_chan), "freeing %d idle descriptors\n", descs); + for (i = 0; i < descs; i++) { + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head + i); + ioat_free_ring_ent(desc, c); + } + + if (descs < total_descs) + dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n", + total_descs - descs); + + for (i = 0; i < total_descs - descs; i++) { + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail + i); + dump_desc_dbg(ioat_chan, desc); + ioat_free_ring_ent(desc, c); + } + + kfree(ioat_chan->ring); + ioat_chan->ring = NULL; + ioat_chan->alloc_order = 0; + pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion, + ioat_chan->completion_dma); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + ioat_chan->last_completion = 0; + ioat_chan->completion_dma = 0; + ioat_chan->dmacount = 0; +} + +/* ioat_alloc_chan_resources - allocate/initialize ioat descriptor ring + * @chan: channel to be initialized + */ +static int ioat_alloc_chan_resources(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent **ring; + u64 status; + int order; + int i = 0; + u32 chanerr; + + /* have we already been set up? */ + if (ioat_chan->ring) + return 1 << ioat_chan->alloc_order; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + ioat_chan->completion = + pci_pool_alloc(ioat_chan->ioat_dma->completion_pool, + GFP_KERNEL, &ioat_chan->completion_dma); + if (!ioat_chan->completion) + return -ENOMEM; + + memset(ioat_chan->completion, 0, sizeof(*ioat_chan->completion)); + writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64)ioat_chan->completion_dma) >> 32, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + order = ioat_get_alloc_order(); + ring = ioat_alloc_ring(c, order, GFP_KERNEL); + if (!ring) + return -ENOMEM; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + ioat_chan->ring = ring; + ioat_chan->head = 0; + ioat_chan->issued = 0; + ioat_chan->tail = 0; + ioat_chan->alloc_order = order; + set_bit(IOAT_RUN, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + ioat_start_null_desc(ioat_chan); + + /* check that we got off the ground */ + do { + udelay(1); + status = ioat_chansts(ioat_chan); + } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); + + if (is_ioat_active(status) || is_ioat_idle(status)) + return 1 << ioat_chan->alloc_order; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + + dev_WARN(to_dev(ioat_chan), + "failed to start channel chanerr: %#x\n", chanerr); + ioat_free_chan_resources(c); + return -EFAULT; +} + +/* common channel initialization */ +static void +ioat_init_channel(struct ioatdma_device *ioat_dma, + struct ioatdma_chan *ioat_chan, int idx) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c = &ioat_chan->dma_chan; + unsigned long data = (unsigned long) c; + + ioat_chan->ioat_dma = ioat_dma; + ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1)); + spin_lock_init(&ioat_chan->cleanup_lock); + ioat_chan->dma_chan.device = dma; + dma_cookie_init(&ioat_chan->dma_chan); + list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels); + ioat_dma->idx[idx] = ioat_chan; + init_timer(&ioat_chan->timer); + ioat_chan->timer.function = ioat_timer_event; + ioat_chan->timer.data = data; + tasklet_init(&ioat_chan->cleanup_task, ioat_cleanup_event, data); +} + +#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ +static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOAT_NUM_SRC_TEST]; + struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 xor_val_result; + int err = 0; + struct completion cmp; + unsigned long tmo; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_device *dma = &ioat_dma->dma_dev; + u8 op = 0; + + dev_dbg(dev, "%s\n", __func__); + + if (!dma_has_cap(DMA_XOR, dma->cap_mask)) + return 0; + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + op = IOAT_OP_XOR; + + dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dest_dma)) + goto dma_unmap; + + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_srcs[i] = DMA_ERROR_CODE; + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) { + dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) + goto dma_unmap; + } + tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOAT_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT); + + if (!tx) { + dev_err(dev, "Self-test xor prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test xor setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test xor timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + + if (ptr[i] != cmp_word) { + dev_err(dev, "Self-test xor failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + /* skip validate if the capability is not present */ + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) + goto free_resources; + + op = IOAT_OP_XOR_VAL; + + /* validate the sources with the destintation page */ + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + xor_val_srcs[i] = xor_srcs[i]; + xor_val_srcs[i] = dest; + + xor_val_result = 1; + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = DMA_ERROR_CODE; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) + goto dma_unmap; + } + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test zero prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test zero setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test validate timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + if (xor_val_result != 0) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + + memset(page_address(dest), 0, PAGE_SIZE); + + /* test for non-zero parity sum */ + op = IOAT_OP_XOR_VAL; + + xor_val_result = 0; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = DMA_ERROR_CODE; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) + goto dma_unmap; + } + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test 2nd zero prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test 2nd zero setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test 2nd validate timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + if (xor_val_result != SUM_CHECK_P_RESULT) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + goto free_resources; +dma_unmap: + if (op == IOAT_OP_XOR) { + if (dest_dma != DMA_ERROR_CODE) + dma_unmap_page(dev, dest_dma, PAGE_SIZE, + DMA_FROM_DEVICE); + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + if (dma_srcs[i] != DMA_ERROR_CODE) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + } else if (op == IOAT_OP_XOR_VAL) { + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + if (dma_srcs[i] != DMA_ERROR_CODE) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + } +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + src_idx = IOAT_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma) +{ + int rc; + + rc = ioat_dma_self_test(ioat_dma); + if (rc) + return rc; + + rc = ioat_xor_val_self_test(ioat_dma); + + return rc; +} + +static void ioat_intr_quirk(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma; + struct dma_chan *c; + struct ioatdma_chan *ioat_chan; + u32 errmask; + + dma = &ioat_dma->dma_dev; + + /* + * if we have descriptor write back error status, we mask the + * error interrupts + */ + if (ioat_dma->cap & IOAT_CAP_DWBES) { + list_for_each_entry(c, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(c); + errmask = readl(ioat_chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | + IOAT_CHANERR_XOR_Q_ERR; + writel(errmask, ioat_chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + } + } +} + +static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) +{ + struct pci_dev *pdev = ioat_dma->pdev; + int dca_en = system_has_dca_enabled(pdev); + struct dma_device *dma; + struct dma_chan *c; + struct ioatdma_chan *ioat_chan; + bool is_raid_device = false; + int err; + + dma = &ioat_dma->dma_dev; + dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat_issue_pending; + dma->device_alloc_chan_resources = ioat_alloc_chan_resources; + dma->device_free_chan_resources = ioat_free_chan_resources; + + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + dma->device_prep_dma_interrupt = ioat_prep_interrupt_lock; + + ioat_dma->cap = readl(ioat_dma->reg_base + IOAT_DMA_CAP_OFFSET); + + if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev)) + ioat_dma->cap &= + ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); + + /* dca is incompatible with raid operations */ + if (dca_en && (ioat_dma->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) + ioat_dma->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); + + if (ioat_dma->cap & IOAT_CAP_XOR) { + is_raid_device = true; + dma->max_xor = 8; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat_prep_xor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat_prep_xor_val; + } + + if (ioat_dma->cap & IOAT_CAP_PQ) { + is_raid_device = true; + + dma->device_prep_dma_pq = ioat_prep_pq; + dma->device_prep_dma_pq_val = ioat_prep_pq_val; + dma_cap_set(DMA_PQ, dma->cap_mask); + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); + + if (ioat_dma->cap & IOAT_CAP_RAID16SS) + dma_set_maxpq(dma, 16, 0); + else + dma_set_maxpq(dma, 8, 0); + + if (!(ioat_dma->cap & IOAT_CAP_XOR)) { + dma->device_prep_dma_xor = ioat_prep_pqxor; + dma->device_prep_dma_xor_val = ioat_prep_pqxor_val; + dma_cap_set(DMA_XOR, dma->cap_mask); + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + + if (ioat_dma->cap & IOAT_CAP_RAID16SS) + dma->max_xor = 16; + else + dma->max_xor = 8; + } + } + + dma->device_tx_status = ioat_tx_status; + + /* starting with CB3.3 super extended descriptors are supported */ + if (ioat_dma->cap & IOAT_CAP_RAID16SS) { + char pool_name[14]; + int i; + + for (i = 0; i < MAX_SED_POOLS; i++) { + snprintf(pool_name, 14, "ioat_hw%d_sed", i); + + /* allocate SED DMA pool */ + ioat_dma->sed_hw_pool[i] = dmam_pool_create(pool_name, + &pdev->dev, + SED_SIZE * (i + 1), 64, 0); + if (!ioat_dma->sed_hw_pool[i]) + return -ENOMEM; + + } + } + + if (!(ioat_dma->cap & (IOAT_CAP_XOR | IOAT_CAP_PQ))) + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + + err = ioat_probe(ioat_dma); + if (err) + return err; + + list_for_each_entry(c, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(c); + writel(IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(ioat_dma); + if (err) + return err; + + ioat_kobject_add(ioat_dma, &ioat_ktype); + + if (dca) + ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base); + + return 0; +} + +#define DRV_NAME "ioatdma" + +static struct pci_driver ioat_pci_driver = { + .name = DRV_NAME, + .id_table = ioat_pci_tbl, + .probe = ioat_pci_probe, + .remove = ioat_remove, +}; + +static struct ioatdma_device * +alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) +{ + struct device *dev = &pdev->dev; + struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + + if (!d) + return NULL; + d->pdev = pdev; + d->reg_base = iobase; + return d; +} + +static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + void __iomem * const *iomap; + struct device *dev = &pdev->dev; + struct ioatdma_device *device; + int err; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); + if (err) + return err; + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); + if (!device) + return -ENOMEM; + pci_set_master(pdev); + pci_set_drvdata(pdev, device); + + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + if (device->version >= IOAT_VER_3_0) + err = ioat3_dma_probe(device, ioat_dca_enabled); + else + return -ENODEV; + + if (err) { + dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); + return -ENODEV; + } + + return 0; +} + +static void ioat_remove(struct pci_dev *pdev) +{ + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device) + return; + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca, &pdev->dev); + free_dca_provider(device->dca); + device->dca = NULL; + } + ioat_dma_remove(device); +} + +static int __init ioat_init_module(void) +{ + int err = -ENOMEM; + + pr_info("%s: Intel(R) QuickData Technology Driver %s\n", + DRV_NAME, IOAT_DMA_VERSION); + + ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ioat_cache) + return -ENOMEM; + + ioat_sed_cache = KMEM_CACHE(ioat_sed_ent, 0); + if (!ioat_sed_cache) + goto err_ioat_cache; + + err = pci_register_driver(&ioat_pci_driver); + if (err) + goto err_ioat3_cache; + + return 0; + + err_ioat3_cache: + kmem_cache_destroy(ioat_sed_cache); + + err_ioat_cache: + kmem_cache_destroy(ioat_cache); + + return err; +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat_cache); +} +module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c deleted file mode 100644 index 76f0dc688a19..000000000000 --- a/drivers/dma/ioat/pci.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2007 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine, which does asynchronous - * copy operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dca.h> -#include <linux/slab.h> -#include "dma.h" -#include "dma_v2.h" -#include "registers.h" -#include "hw.h" - -MODULE_VERSION(IOAT_DMA_VERSION); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel Corporation"); - -static struct pci_device_id ioat_pci_tbl[] = { - /* I/OAT v1 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, - { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, - - /* I/OAT v2 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, - - /* I/OAT v3 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, - - /* I/OAT v3.2 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, - - /* I/OAT v3.3 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) }, - - { 0, } -}; -MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); - -static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); -static void ioat_remove(struct pci_dev *pdev); - -static int ioat_dca_enabled = 1; -module_param(ioat_dca_enabled, int, 0644); -MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); - -struct kmem_cache *ioat2_cache; -struct kmem_cache *ioat3_sed_cache; - -#define DRV_NAME "ioatdma" - -static struct pci_driver ioat_pci_driver = { - .name = DRV_NAME, - .id_table = ioat_pci_tbl, - .probe = ioat_pci_probe, - .remove = ioat_remove, -}; - -static struct ioatdma_device * -alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) -{ - struct device *dev = &pdev->dev; - struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); - - if (!d) - return NULL; - d->pdev = pdev; - d->reg_base = iobase; - return d; -} - -static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - void __iomem * const *iomap; - struct device *dev = &pdev->dev; - struct ioatdma_device *device; - int err; - - err = pcim_enable_device(pdev); - if (err) - return err; - - err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); - if (err) - return err; - iomap = pcim_iomap_table(pdev); - if (!iomap) - return -ENOMEM; - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - return err; - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - return err; - - device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); - if (!device) - return -ENOMEM; - pci_set_master(pdev); - pci_set_drvdata(pdev, device); - - device->version = readb(device->reg_base + IOAT_VER_OFFSET); - if (device->version == IOAT_VER_1_2) - err = ioat1_dma_probe(device, ioat_dca_enabled); - else if (device->version == IOAT_VER_2_0) - err = ioat2_dma_probe(device, ioat_dca_enabled); - else if (device->version >= IOAT_VER_3_0) - err = ioat3_dma_probe(device, ioat_dca_enabled); - else - return -ENODEV; - - if (err) { - dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); - return -ENODEV; - } - - return 0; -} - -static void ioat_remove(struct pci_dev *pdev) -{ - struct ioatdma_device *device = pci_get_drvdata(pdev); - - if (!device) - return; - - dev_err(&pdev->dev, "Removing dma and dca services\n"); - if (device->dca) { - unregister_dca_provider(device->dca, &pdev->dev); - free_dca_provider(device->dca); - device->dca = NULL; - } - ioat_dma_remove(device); -} - -static int __init ioat_init_module(void) -{ - int err = -ENOMEM; - - pr_info("%s: Intel(R) QuickData Technology Driver %s\n", - DRV_NAME, IOAT_DMA_VERSION); - - ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ioat2_cache) - return -ENOMEM; - - ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0); - if (!ioat3_sed_cache) - goto err_ioat2_cache; - - err = pci_register_driver(&ioat_pci_driver); - if (err) - goto err_ioat3_cache; - - return 0; - - err_ioat3_cache: - kmem_cache_destroy(ioat3_sed_cache); - - err_ioat2_cache: - kmem_cache_destroy(ioat2_cache); - - return err; -} -module_init(ioat_init_module); - -static void __exit ioat_exit_module(void) -{ - pci_unregister_driver(&ioat_pci_driver); - kmem_cache_destroy(ioat2_cache); -} -module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/prep.c b/drivers/dma/ioat/prep.c new file mode 100644 index 000000000000..ad4fb41cd23b --- /dev/null +++ b/drivers/dma/ioat/prep.c @@ -0,0 +1,715 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/gfp.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/prefetch.h> +#include "../dmaengine.h" +#include "registers.h" +#include "hw.h" +#include "dma.h" + +#define MAX_SCF 1024 + +/* provide a lookup table for setting the source address in the base or + * extended descriptor of an xor or pq descriptor + */ +static const u8 xor_idx_to_desc = 0xe0; +static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2 }; +static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6 }; + +static void xor_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + raw->field[xor_idx_to_field[idx]] = addr + offset; +} + +static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + return raw->field[pq_idx_to_field[idx]]; +} + +static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) +{ + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + return raw->field[pq16_idx_to_field[idx]]; +} + +static void pq_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + raw->field[pq_idx_to_field[idx]] = addr + offset; + pq->coef[idx] = coef; +} + +static void pq16_set_src(struct ioat_raw_descriptor *desc[3], + dma_addr_t addr, u32 offset, u8 coef, unsigned idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; + struct ioat_pq16a_descriptor *pq16 = + (struct ioat_pq16a_descriptor *)desc[1]; + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + raw->field[pq16_idx_to_field[idx]] = addr + offset; + + if (idx < 8) + pq->coef[idx] = coef; + else + pq16->coef[idx - 8] = coef; +} + +static struct ioat_sed_ent * +ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) +{ + struct ioat_sed_ent *sed; + gfp_t flags = __GFP_ZERO | GFP_ATOMIC; + + sed = kmem_cache_alloc(ioat_sed_cache, flags); + if (!sed) + return NULL; + + sed->hw_pool = hw_pool; + sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], + flags, &sed->dma); + if (!sed->hw) { + kmem_cache_free(ioat_sed_cache, sed); + return NULL; + } + + return sed; +} + +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs, idx, i; + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + dump_desc_dbg(ioat_chan, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat_chan, desc); + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + + +static struct dma_async_tx_descriptor * +__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, + dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + struct ioat_dma_descriptor *hw; + int num_descs, with_ext, idx, i; + u32 offset = 0; + u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; + + BUG_ON(src_cnt < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 5 + * sources + */ + if (src_cnt > 5) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs+1) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, + len, 1 << ioat_chan->xfercap_log); + int s; + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + xor = desc->xor; + + /* save a branch by unconditionally retrieving the + * extended descriptor xor_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + 1); + xor_ex = ext->xor_ex; + + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (s = 0; s < src_cnt; s++) + xor_set_src(descs, src[s], offset, s); + xor->size = xfer_size; + xor->dst_addr = dest + offset; + xor->ctl = 0; + xor->ctl_f.op = op; + xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); + + len -= xfer_size; + offset += xfer_size; + dump_desc_dbg(ioat_chan, desc); + } while ((i += 1 + with_ext) < num_descs); + + /* last xor descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +struct dma_async_tx_descriptor * +ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + return __ioat_prep_xor_lock(chan, result, src[0], &src[1], + src_cnt - 1, len, flags); +} + +static void +dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc, + struct ioat_ring_ent *ext) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; + struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) (pq_ex ? pq_ex->next : pq->next), + desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, + pq->ctl_f.int_en, pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq_get_src(descs, i), pq->coef[i]); + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); + dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); +} + +static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan, + struct ioat_ring_ent *desc) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_raw_descriptor *descs[] = { (void *)pq, + (void *)pq, + (void *)pq }; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + if (desc->sed) { + descs[1] = (void *)desc->sed->hw; + descs[2] = (void *)desc->sed->hw + 64; + } + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) pq->next, + desc->txd.flags, pq->size, pq->ctl, + pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) { + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq16_get_src(descs, i), + pq->coef[i]); + } + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + int i, s, idx, with_ext, num_descs; + int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + /* the engine requires at least two sources (we provide + * at least 1 implied source in the DMA_PREP_CONTINUE case) + */ + BUG_ON(src_cnt + dmaf_continue(flags) < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 3 + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. + */ + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + /* save a branch by unconditionally retrieving the + * extended descriptor pq_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext); + pq_ex = ext->pq_ex; + + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + + for (s = 0; s < src_cnt; s++) + pq_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq_set_src(descs, dst[0], offset, 0, s++); + pq_set_src(descs, dst[1], offset, 1, s++); + pq_set_src(descs, dst[1], offset, 0, s++); + } + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.src_cnt = src_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while ((i += 1 + with_ext) < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + dump_pq_desc_dbg(ioat_chan, desc, ext); + + if (!cb32) { + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + compl_desc = desc; + } else { + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + } + + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + u32 offset = 0; + u8 op; + int i, s, idx, num_descs; + + /* this function is only called with 9-16 sources */ + op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + + /* + * 16 source pq is only available on cb3.3 and has no completion + * write hw bug. + */ + if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + + i = 0; + + do { + struct ioat_raw_descriptor *descs[4]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + descs[0] = (struct ioat_raw_descriptor *) pq; + + desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3); + if (!desc->sed) { + dev_err(to_dev(ioat_chan), + "%s: no free sed entries\n", __func__); + return NULL; + } + + pq->sed_addr = desc->sed->dma; + desc->sed->parent = desc; + + descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; + descs[2] = (void *)descs[1] + 64; + + for (s = 0; s < src_cnt; s++) + pq16_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq16_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq16_set_src(descs, dst[0], offset, 0, s++); + pq16_set_src(descs, dst[1], offset, 1, s++); + pq16_set_src(descs, dst[1], offset, 0, s++); + } + + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src16_cnt_to_hw(s); + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while (++i < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* with cb3.3 we should be able to do completion w/o a null desc */ + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + + dump_pq16_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) +{ + if (dmaf_p_disabled_continue(flags)) + return src_cnt + 1; + else if (dmaf_continue(flags)) + return src_cnt + 3; + else + return src_cnt; +} + +struct dma_async_tx_descriptor * +ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + dst[1] = dst[0]; + + /* handle the single source multiply case from the raid6 + * recovery path + */ + if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { + dma_addr_t single_source[2]; + unsigned char single_source_coef[2]; + + BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); + single_source[0] = src[0]; + single_source[1] = src[0]; + single_source_coef[0] = scf[0]; + single_source_coef[1] = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, single_source, + 2, single_source_coef, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + + } else { + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags) : + __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags); + } +} + +struct dma_async_tx_descriptor * +ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + pq[0] = pq[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + pq[1] = pq[0]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *pqres = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + unsigned char scf[MAX_SCF]; + dma_addr_t pq[2]; + + if (src_cnt > MAX_SCF) + return NULL; + + memset(scf, 0, src_cnt); + pq[0] = dst; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = dst; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + unsigned char scf[MAX_SCF]; + dma_addr_t pq[2]; + + if (src_cnt > MAX_SCF) + return NULL; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + memset(scf, 0, src_cnt); + pq[0] = src[0]; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = pq[0]; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags) : + __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + + if (ioat_check_space_lock(ioat_chan, 1) == 0) + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); + else + return NULL; + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + + desc->txd.flags = flags; + desc->len = 1; + + dump_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + diff --git a/drivers/dma/ioat/sysfs.c b/drivers/dma/ioat/sysfs.c new file mode 100644 index 000000000000..cb4a857ee21b --- /dev/null +++ b/drivers/dma/ioat/sysfs.c @@ -0,0 +1,135 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/dmaengine.h> +#include <linux/pci.h> +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +static ssize_t cap_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + + return sprintf(page, "copy%s%s%s%s%s\n", + dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", + dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", + dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", + dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", + dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + +} +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); + +static ssize_t version_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + struct ioatdma_device *ioat_dma = to_ioatdma_device(dma); + + return sprintf(page, "%d.%d\n", + ioat_dma->version >> 4, ioat_dma->version & 0xf); +} +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); + +static ssize_t +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct ioat_sysfs_entry *entry; + struct ioatdma_chan *ioat_chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + ioat_chan = container_of(kobj, struct ioatdma_chan, kobj); + + if (!entry->show) + return -EIO; + return entry->show(&ioat_chan->dma_chan, page); +} + +const struct sysfs_ops ioat_sysfs_ops = { + .show = ioat_attr_show, +}; + +void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct kobject *parent = &c->dev->device.kobj; + int err; + + err = kobject_init_and_add(&ioat_chan->kobj, type, + parent, "quickdata"); + if (err) { + dev_warn(to_dev(ioat_chan), + "sysfs init error (%d), continuing...\n", err); + kobject_put(&ioat_chan->kobj); + set_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state); + } + } +} + +void ioat_kobject_del(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + if (!test_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state)) { + kobject_del(&ioat_chan->kobj); + kobject_put(&ioat_chan->kobj); + } + } +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + return sprintf(page, "%d\n", (1 << ioat_chan->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat_ring_active(ioat_chan)); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static struct attribute *ioat_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +struct kobj_type ioat_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat_attrs, +}; diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c index 2e284a4438bc..4768a829253a 100644 --- a/drivers/dma/ipu/ipu_irq.c +++ b/drivers/dma/ipu/ipu_irq.c @@ -265,10 +265,10 @@ int ipu_irq_unmap(unsigned int source) return ret; } -/* Chained IRQ handler for IPU error interrupt */ -static void ipu_irq_err(unsigned int irq, struct irq_desc *desc) +/* Chained IRQ handler for IPU function and error interrupt */ +static void ipu_irq_handler(unsigned int __irq, struct irq_desc *desc) { - struct ipu *ipu = irq_get_handler_data(irq); + struct ipu *ipu = irq_desc_get_handler_data(desc); u32 status; int i, line; @@ -286,43 +286,7 @@ static void ipu_irq_err(unsigned int irq, struct irq_desc *desc) raw_spin_unlock(&bank_lock); while ((line = ffs(status))) { struct ipu_irq_map *map; - - line--; - status &= ~(1UL << line); - - raw_spin_lock(&bank_lock); - map = src2map(32 * i + line); - if (map) - irq = map->irq; - raw_spin_unlock(&bank_lock); - - if (!map) { - pr_err("IPU: Interrupt on unmapped source %u bank %d\n", - line, i); - continue; - } - generic_handle_irq(irq); - } - } -} - -/* Chained IRQ handler for IPU function interrupt */ -static void ipu_irq_fn(unsigned int irq, struct irq_desc *desc) -{ - struct ipu *ipu = irq_desc_get_handler_data(desc); - u32 status; - int i, line; - - for (i = 0; i < IPU_IRQ_NR_FN_BANKS; i++) { - struct ipu_irq_bank *bank = irq_bank + i; - - raw_spin_lock(&bank_lock); - status = ipu_read_reg(ipu, bank->status); - /* Not clearing all interrupts, see above */ - status &= ipu_read_reg(ipu, bank->control); - raw_spin_unlock(&bank_lock); - while ((line = ffs(status))) { - struct ipu_irq_map *map; + unsigned int irq = NO_IRQ; line--; status &= ~(1UL << line); @@ -377,16 +341,12 @@ int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev) irq_map[i].irq = irq; irq_map[i].source = -EINVAL; irq_set_handler(irq, handle_level_irq); -#ifdef CONFIG_ARM - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); -#endif + irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE); } - irq_set_handler_data(ipu->irq_fn, ipu); - irq_set_chained_handler(ipu->irq_fn, ipu_irq_fn); + irq_set_chained_handler_and_data(ipu->irq_fn, ipu_irq_handler, ipu); - irq_set_handler_data(ipu->irq_err, ipu); - irq_set_chained_handler(ipu->irq_err, ipu_irq_err); + irq_set_chained_handler_and_data(ipu->irq_err, ipu_irq_handler, ipu); ipu->irq_base = irq_base; @@ -399,16 +359,12 @@ void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev) irq_base = ipu->irq_base; - irq_set_chained_handler(ipu->irq_fn, NULL); - irq_set_handler_data(ipu->irq_fn, NULL); + irq_set_chained_handler_and_data(ipu->irq_fn, NULL, NULL); - irq_set_chained_handler(ipu->irq_err, NULL); - irq_set_handler_data(ipu->irq_err, NULL); + irq_set_chained_handler_and_data(ipu->irq_err, NULL, NULL); for (irq = irq_base; irq < irq_base + CONFIG_MX3_IPU_IRQS; irq++) { -#ifdef CONFIG_ARM - set_irq_flags(irq, 0); -#endif + irq_set_status_flags(irq, IRQ_NOREQUEST); irq_set_chip(irq, NULL); irq_set_chip_data(irq, NULL); } diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c index 647e362f01fd..1ba2fd73852d 100644 --- a/drivers/dma/k3dma.c +++ b/drivers/dma/k3dma.c @@ -24,7 +24,6 @@ #include "virt-dma.h" #define DRIVER_NAME "k3-dma" -#define DMA_ALIGN 3 #define DMA_MAX_SIZE 0x1ffc #define INT_STAT 0x00 @@ -732,7 +731,7 @@ static int k3_dma_probe(struct platform_device *op) d->slave.device_pause = k3_dma_transfer_pause; d->slave.device_resume = k3_dma_transfer_resume; d->slave.device_terminate_all = k3_dma_terminate_all; - d->slave.copy_align = DMA_ALIGN; + d->slave.copy_align = DMAENGINE_ALIGN_8_BYTES; /* init virtual channel */ d->chans = devm_kzalloc(&op->dev, diff --git a/drivers/dma/lpc18xx-dmamux.c b/drivers/dma/lpc18xx-dmamux.c new file mode 100644 index 000000000000..761f32687055 --- /dev/null +++ b/drivers/dma/lpc18xx-dmamux.c @@ -0,0 +1,183 @@ +/* + * DMA Router driver for LPC18xx/43xx DMA MUX + * + * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com> + * + * Based on TI DMA Crossbar driver by: + * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi <peter.ujfalusi@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/err.h> +#include <linux/init.h> +#include <linux/mfd/syscon.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/regmap.h> +#include <linux/spinlock.h> + +/* CREG register offset and macros for mux manipulation */ +#define LPC18XX_CREG_DMAMUX 0x11c +#define LPC18XX_DMAMUX_VAL(v, n) ((v) << (n * 2)) +#define LPC18XX_DMAMUX_MASK(n) (0x3 << (n * 2)) +#define LPC18XX_DMAMUX_MAX_VAL 0x3 + +struct lpc18xx_dmamux { + u32 value; + bool busy; +}; + +struct lpc18xx_dmamux_data { + struct dma_router dmarouter; + struct lpc18xx_dmamux *muxes; + u32 dma_master_requests; + u32 dma_mux_requests; + struct regmap *reg; + spinlock_t lock; +}; + +static void lpc18xx_dmamux_free(struct device *dev, void *route_data) +{ + struct lpc18xx_dmamux_data *dmamux = dev_get_drvdata(dev); + struct lpc18xx_dmamux *mux = route_data; + unsigned long flags; + + spin_lock_irqsave(&dmamux->lock, flags); + mux->busy = false; + spin_unlock_irqrestore(&dmamux->lock, flags); +} + +static void *lpc18xx_dmamux_reserve(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct lpc18xx_dmamux_data *dmamux = platform_get_drvdata(pdev); + unsigned long flags; + unsigned mux; + + if (dma_spec->args_count != 3) { + dev_err(&pdev->dev, "invalid number of dma mux args\n"); + return ERR_PTR(-EINVAL); + } + + mux = dma_spec->args[0]; + if (mux >= dmamux->dma_master_requests) { + dev_err(&pdev->dev, "invalid mux number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + if (dma_spec->args[1] > LPC18XX_DMAMUX_MAX_VAL) { + dev_err(&pdev->dev, "invalid dma mux value: %d\n", + dma_spec->args[1]); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "can't get dma master\n"); + return ERR_PTR(-EINVAL); + } + + spin_lock_irqsave(&dmamux->lock, flags); + if (dmamux->muxes[mux].busy) { + spin_unlock_irqrestore(&dmamux->lock, flags); + dev_err(&pdev->dev, "dma request %u busy with %u.%u\n", + mux, mux, dmamux->muxes[mux].value); + of_node_put(dma_spec->np); + return ERR_PTR(-EBUSY); + } + + dmamux->muxes[mux].busy = true; + dmamux->muxes[mux].value = dma_spec->args[1]; + + regmap_update_bits(dmamux->reg, LPC18XX_CREG_DMAMUX, + LPC18XX_DMAMUX_MASK(mux), + LPC18XX_DMAMUX_VAL(dmamux->muxes[mux].value, mux)); + spin_unlock_irqrestore(&dmamux->lock, flags); + + dma_spec->args[1] = dma_spec->args[2]; + dma_spec->args_count = 2; + + dev_dbg(&pdev->dev, "mapping dmamux %u.%u to dma request %u\n", mux, + dmamux->muxes[mux].value, mux); + + return &dmamux->muxes[mux]; +} + +static int lpc18xx_dmamux_probe(struct platform_device *pdev) +{ + struct device_node *dma_np, *np = pdev->dev.of_node; + struct lpc18xx_dmamux_data *dmamux; + int ret; + + dmamux = devm_kzalloc(&pdev->dev, sizeof(*dmamux), GFP_KERNEL); + if (!dmamux) + return -ENOMEM; + + dmamux->reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); + if (IS_ERR(dmamux->reg)) { + dev_err(&pdev->dev, "syscon lookup failed\n"); + return PTR_ERR(dmamux->reg); + } + + ret = of_property_read_u32(np, "dma-requests", + &dmamux->dma_mux_requests); + if (ret) { + dev_err(&pdev->dev, "missing dma-requests property\n"); + return ret; + } + + dma_np = of_parse_phandle(np, "dma-masters", 0); + if (!dma_np) { + dev_err(&pdev->dev, "can't get dma master\n"); + return -ENODEV; + } + + ret = of_property_read_u32(dma_np, "dma-requests", + &dmamux->dma_master_requests); + of_node_put(dma_np); + if (ret) { + dev_err(&pdev->dev, "missing master dma-requests property\n"); + return ret; + } + + dmamux->muxes = devm_kcalloc(&pdev->dev, dmamux->dma_master_requests, + sizeof(struct lpc18xx_dmamux), + GFP_KERNEL); + if (!dmamux->muxes) + return -ENOMEM; + + spin_lock_init(&dmamux->lock); + platform_set_drvdata(pdev, dmamux); + dmamux->dmarouter.dev = &pdev->dev; + dmamux->dmarouter.route_free = lpc18xx_dmamux_free; + + return of_dma_router_register(np, lpc18xx_dmamux_reserve, + &dmamux->dmarouter); +} + +static const struct of_device_id lpc18xx_dmamux_match[] = { + { .compatible = "nxp,lpc1850-dmamux" }, + {}, +}; + +static struct platform_driver lpc18xx_dmamux_driver = { + .probe = lpc18xx_dmamux_probe, + .driver = { + .name = "lpc18xx-dmamux", + .of_match_table = lpc18xx_dmamux_match, + }, +}; + +static int __init lpc18xx_dmamux_init(void) +{ + return platform_driver_register(&lpc18xx_dmamux_driver); +} +arch_initcall(lpc18xx_dmamux_init); diff --git a/drivers/dma/mic_x100_dma.h b/drivers/dma/mic_x100_dma.h index f663b0bdd11d..d89982034e68 100644 --- a/drivers/dma/mic_x100_dma.h +++ b/drivers/dma/mic_x100_dma.h @@ -39,7 +39,7 @@ */ #define MIC_DMA_MAX_NUM_CHAN 8 #define MIC_DMA_NUM_CHAN 4 -#define MIC_DMA_ALIGN_SHIFT 6 +#define MIC_DMA_ALIGN_SHIFT DMAENGINE_ALIGN_64_BYTES #define MIC_DMA_ALIGN_BYTES (1 << MIC_DMA_ALIGN_SHIFT) #define MIC_DMA_DESC_RX_SIZE (128 * 1024 - 4) diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index 462a0229a743..e39457f13d4d 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -72,7 +72,6 @@ #define DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */ #define DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ -#define PDMA_ALIGNMENT 3 #define PDMA_MAX_DESC_BYTES DCMD_LENGTH struct mmp_pdma_desc_hw { @@ -1071,7 +1070,7 @@ static int mmp_pdma_probe(struct platform_device *op) pdev->device.device_issue_pending = mmp_pdma_issue_pending; pdev->device.device_config = mmp_pdma_config; pdev->device.device_terminate_all = mmp_pdma_terminate_all; - pdev->device.copy_align = PDMA_ALIGNMENT; + pdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; pdev->device.src_addr_widths = widths; pdev->device.dst_addr_widths = widths; pdev->device.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c index e683761e0f8f..3df0422607d5 100644 --- a/drivers/dma/mmp_tdma.c +++ b/drivers/dma/mmp_tdma.c @@ -100,7 +100,6 @@ enum mmp_tdma_type { PXA910_SQU, }; -#define TDMA_ALIGNMENT 3 #define TDMA_MAX_XFER_BYTES SZ_64K struct mmp_tdma_chan { @@ -695,7 +694,7 @@ static int mmp_tdma_probe(struct platform_device *pdev) tdev->device.device_pause = mmp_tdma_pause_chan; tdev->device.device_resume = mmp_tdma_resume_chan; tdev->device.device_terminate_all = mmp_tdma_terminate_all; - tdev->device.copy_align = TDMA_ALIGNMENT; + tdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); platform_set_drvdata(pdev, tdev); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index f1325f62563e..1c2de9a834a9 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -13,7 +13,6 @@ */ #include <linux/init.h> -#include <linux/module.h> #include <linux/slab.h> #include <linux/delay.h> #include <linux/dma-mapping.h> @@ -26,6 +25,7 @@ #include <linux/of.h> #include <linux/of_irq.h> #include <linux/irqdomain.h> +#include <linux/cpumask.h> #include <linux/platform_data/dma-mv_xor.h> #include "dmaengine.h" @@ -1126,7 +1126,8 @@ static const struct of_device_id mv_xor_dt_ids[] = { { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC }, {}, }; -MODULE_DEVICE_TABLE(of, mv_xor_dt_ids); + +static unsigned int mv_xor_engine_count; static int mv_xor_probe(struct platform_device *pdev) { @@ -1134,6 +1135,7 @@ static int mv_xor_probe(struct platform_device *pdev) struct mv_xor_device *xordev; struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev); struct resource *res; + unsigned int max_engines, max_channels; int i, ret; int op_in_desc; @@ -1177,6 +1179,21 @@ static int mv_xor_probe(struct platform_device *pdev) if (!IS_ERR(xordev->clk)) clk_prepare_enable(xordev->clk); + /* + * We don't want to have more than one channel per CPU in + * order for async_tx to perform well. So we limit the number + * of engines and channels so that we take into account this + * constraint. Note that we also want to use channels from + * separate engines when possible. + */ + max_engines = num_present_cpus(); + max_channels = min_t(unsigned int, + MV_XOR_MAX_CHANNELS, + DIV_ROUND_UP(num_present_cpus(), 2)); + + if (mv_xor_engine_count >= max_engines) + return 0; + if (pdev->dev.of_node) { struct device_node *np; int i = 0; @@ -1190,13 +1207,13 @@ static int mv_xor_probe(struct platform_device *pdev) int irq; op_in_desc = (int)of_id->data; + if (i >= max_channels) + continue; + dma_cap_zero(cap_mask); - if (of_property_read_bool(np, "dmacap,memcpy")) - dma_cap_set(DMA_MEMCPY, cap_mask); - if (of_property_read_bool(np, "dmacap,xor")) - dma_cap_set(DMA_XOR, cap_mask); - if (of_property_read_bool(np, "dmacap,interrupt")) - dma_cap_set(DMA_INTERRUPT, cap_mask); + dma_cap_set(DMA_MEMCPY, cap_mask); + dma_cap_set(DMA_XOR, cap_mask); + dma_cap_set(DMA_INTERRUPT, cap_mask); irq = irq_of_parse_and_map(np, 0); if (!irq) { @@ -1216,7 +1233,7 @@ static int mv_xor_probe(struct platform_device *pdev) i++; } } else if (pdata && pdata->channels) { - for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { + for (i = 0; i < max_channels; i++) { struct mv_xor_channel_data *cd; struct mv_xor_chan *chan; int irq; @@ -1263,27 +1280,8 @@ err_channel_add: return ret; } -static int mv_xor_remove(struct platform_device *pdev) -{ - struct mv_xor_device *xordev = platform_get_drvdata(pdev); - int i; - - for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { - if (xordev->channels[i]) - mv_xor_channel_remove(xordev->channels[i]); - } - - if (!IS_ERR(xordev->clk)) { - clk_disable_unprepare(xordev->clk); - clk_put(xordev->clk); - } - - return 0; -} - static struct platform_driver mv_xor_driver = { .probe = mv_xor_probe, - .remove = mv_xor_remove, .driver = { .name = MV_XOR_NAME, .of_match_table = of_match_ptr(mv_xor_dt_ids), @@ -1295,19 +1293,10 @@ static int __init mv_xor_init(void) { return platform_driver_register(&mv_xor_driver); } -module_init(mv_xor_init); - -/* it's currently unsafe to unload this module */ -#if 0 -static void __exit mv_xor_exit(void) -{ - platform_driver_unregister(&mv_xor_driver); - return; -} - -module_exit(mv_xor_exit); -#endif +device_initcall(mv_xor_init); +/* MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>"); MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine"); MODULE_LICENSE("GPL"); +*/ diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index b859792dde95..113605f6fe20 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/dmaengine.h> diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index ecab4ea059b4..17ee758b419f 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1198,6 +1198,9 @@ static inline int _loop(unsigned dry_run, u8 buf[], unsigned lcnt0, lcnt1, ljmp0, ljmp1; struct _arg_LPEND lpend; + if (*bursts == 1) + return _bursts(dry_run, buf, pxs, 1); + /* Max iterations possible in DMALP is 256 */ if (*bursts >= 256*256) { lcnt1 = 256; diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index ddcbbf5cd9e9..5cb61ce01036 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -184,19 +184,18 @@ static unsigned int pxad_drcmr(unsigned int line) static int dbg_show_requester_chan(struct seq_file *s, void *p) { - int pos = 0; struct pxad_phy *phy = s->private; int i; u32 drcmr; - pos += seq_printf(s, "DMA channel %d requester :\n", phy->idx); + seq_printf(s, "DMA channel %d requester :\n", phy->idx); for (i = 0; i < 70; i++) { drcmr = readl_relaxed(phy->base + pxad_drcmr(i)); if ((drcmr & DRCMR_CHLNUM) == phy->idx) - pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, - !!(drcmr & DRCMR_MAPVLD)); + seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, + !!(drcmr & DRCMR_MAPVLD)); } - return pos; + return 0; } static inline int dbg_burst_from_dcmd(u32 dcmd) @@ -906,21 +905,21 @@ static void pxad_get_config(struct pxad_chan *chan, enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; *dcmd = 0; - if (chan->cfg.direction == DMA_DEV_TO_MEM) { + if (dir == DMA_DEV_TO_MEM) { maxburst = chan->cfg.src_maxburst; width = chan->cfg.src_addr_width; dev_addr = chan->cfg.src_addr; *dev_src = dev_addr; *dcmd |= PXA_DCMD_INCTRGADDR | PXA_DCMD_FLOWSRC; } - if (chan->cfg.direction == DMA_MEM_TO_DEV) { + if (dir == DMA_MEM_TO_DEV) { maxburst = chan->cfg.dst_maxburst; width = chan->cfg.dst_addr_width; dev_addr = chan->cfg.dst_addr; *dev_dst = dev_addr; *dcmd |= PXA_DCMD_INCSRCADDR | PXA_DCMD_FLOWTRG; } - if (chan->cfg.direction == DMA_MEM_TO_MEM) + if (dir == DMA_MEM_TO_MEM) *dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR | PXA_DCMD_INCSRCADDR; diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig index 0f371524a4d9..9fda65af841e 100644 --- a/drivers/dma/sh/Kconfig +++ b/drivers/dma/sh/Kconfig @@ -39,18 +39,6 @@ config SH_DMAE_R8A73A4 endif -config SUDMAC - tristate "Renesas SUDMAC support" - depends on SH_DMAE_BASE - help - Enable support for the Renesas SUDMAC controllers. - -config RCAR_HPB_DMAE - tristate "Renesas R-Car HPB DMAC support" - depends on SH_DMAE_BASE - help - Enable support for the Renesas R-Car series DMA controllers. - config RCAR_DMAC tristate "Renesas R-Car Gen2 DMA Controller" depends on ARCH_SHMOBILE || COMPILE_TEST @@ -59,6 +47,12 @@ config RCAR_DMAC This driver supports the general purpose DMA controller found in the Renesas R-Car second generation SoCs. +config RCAR_HPB_DMAE + tristate "Renesas R-Car HPB DMAC support" + depends on SH_DMAE_BASE + help + Enable support for the Renesas R-Car series DMA controllers. + config RENESAS_USB_DMAC tristate "Renesas USB-DMA Controller" depends on ARCH_SHMOBILE || COMPILE_TEST @@ -67,3 +61,9 @@ config RENESAS_USB_DMAC help This driver supports the USB-DMA controller found in the Renesas SoCs. + +config SUDMAC + tristate "Renesas SUDMAC support" + depends on SH_DMAE_BASE + help + Enable support for the Renesas SUDMAC controllers. diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile index b8a598066ce2..0133e4658196 100644 --- a/drivers/dma/sh/Makefile +++ b/drivers/dma/sh/Makefile @@ -13,7 +13,7 @@ shdma-$(CONFIG_SH_DMAE_R8A73A4) += shdma-r8a73a4.o shdma-objs := $(shdma-y) obj-$(CONFIG_SH_DMAE) += shdma.o -obj-$(CONFIG_SUDMAC) += sudmac.o -obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o +obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o +obj-$(CONFIG_SUDMAC) += sudmac.o diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c index 8c5186cc9f63..7d5598d874e1 100644 --- a/drivers/dma/sirf-dma.c +++ b/drivers/dma/sirf-dma.c @@ -455,6 +455,7 @@ static int sirfsoc_dma_terminate_all(struct dma_chan *chan) switch (sdma->type) { case SIRFSOC_DMA_VER_A7V1: writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_INT); writel_relaxed((1 << cid) | 1 << (cid + 16), sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7); @@ -462,6 +463,8 @@ static int sirfsoc_dma_terminate_all(struct dma_chan *chan) break; case SIRFSOC_DMA_VER_A7V2: writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7); + writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, + sdma->base + SIRFSOC_DMA_INT_ATLAS7); writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7); break; diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 3c10f034d4b9..750d1b313684 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2853,7 +2853,7 @@ static void d40_ops_init(struct d40_base *base, struct dma_device *dev) * This controller can only access address at even * 32bit boundaries, i.e. 2^2 */ - dev->copy_align = 2; + dev->copy_align = DMAENGINE_ALIGN_4_BYTES; } if (dma_has_cap(DMA_SG, dev->cap_mask)) diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c new file mode 100644 index 000000000000..a1a500d96ff2 --- /dev/null +++ b/drivers/dma/sun4i-dma.c @@ -0,0 +1,1288 @@ +/* + * Copyright (C) 2014 Emilio López + * Emilio López <emilio@elopez.com.ar> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/dmaengine.h> +#include <linux/dmapool.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of_dma.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include "virt-dma.h" + +/** Common macros to normal and dedicated DMA registers **/ + +#define SUN4I_DMA_CFG_LOADING BIT(31) +#define SUN4I_DMA_CFG_DST_DATA_WIDTH(width) ((width) << 25) +#define SUN4I_DMA_CFG_DST_BURST_LENGTH(len) ((len) << 23) +#define SUN4I_DMA_CFG_DST_ADDR_MODE(mode) ((mode) << 21) +#define SUN4I_DMA_CFG_DST_DRQ_TYPE(type) ((type) << 16) +#define SUN4I_DMA_CFG_SRC_DATA_WIDTH(width) ((width) << 9) +#define SUN4I_DMA_CFG_SRC_BURST_LENGTH(len) ((len) << 7) +#define SUN4I_DMA_CFG_SRC_ADDR_MODE(mode) ((mode) << 5) +#define SUN4I_DMA_CFG_SRC_DRQ_TYPE(type) (type) + +/** Normal DMA register values **/ + +/* Normal DMA source/destination data request type values */ +#define SUN4I_NDMA_DRQ_TYPE_SDRAM 0x16 +#define SUN4I_NDMA_DRQ_TYPE_LIMIT (0x1F + 1) + +/** Normal DMA register layout **/ + +/* Dedicated DMA source/destination address mode values */ +#define SUN4I_NDMA_ADDR_MODE_LINEAR 0 +#define SUN4I_NDMA_ADDR_MODE_IO 1 + +/* Normal DMA configuration register layout */ +#define SUN4I_NDMA_CFG_CONT_MODE BIT(30) +#define SUN4I_NDMA_CFG_WAIT_STATE(n) ((n) << 27) +#define SUN4I_NDMA_CFG_DST_NON_SECURE BIT(22) +#define SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15) +#define SUN4I_NDMA_CFG_SRC_NON_SECURE BIT(6) + +/** Dedicated DMA register values **/ + +/* Dedicated DMA source/destination address mode values */ +#define SUN4I_DDMA_ADDR_MODE_LINEAR 0 +#define SUN4I_DDMA_ADDR_MODE_IO 1 +#define SUN4I_DDMA_ADDR_MODE_HORIZONTAL_PAGE 2 +#define SUN4I_DDMA_ADDR_MODE_VERTICAL_PAGE 3 + +/* Dedicated DMA source/destination data request type values */ +#define SUN4I_DDMA_DRQ_TYPE_SDRAM 0x1 +#define SUN4I_DDMA_DRQ_TYPE_LIMIT (0x1F + 1) + +/** Dedicated DMA register layout **/ + +/* Dedicated DMA configuration register layout */ +#define SUN4I_DDMA_CFG_BUSY BIT(30) +#define SUN4I_DDMA_CFG_CONT_MODE BIT(29) +#define SUN4I_DDMA_CFG_DST_NON_SECURE BIT(28) +#define SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15) +#define SUN4I_DDMA_CFG_SRC_NON_SECURE BIT(12) + +/* Dedicated DMA parameter register layout */ +#define SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(n) (((n) - 1) << 24) +#define SUN4I_DDMA_PARA_DST_WAIT_CYCLES(n) (((n) - 1) << 16) +#define SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(n) (((n) - 1) << 8) +#define SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(n) (((n) - 1) << 0) + +/** DMA register offsets **/ + +/* General register offsets */ +#define SUN4I_DMA_IRQ_ENABLE_REG 0x0 +#define SUN4I_DMA_IRQ_PENDING_STATUS_REG 0x4 + +/* Normal DMA register offsets */ +#define SUN4I_NDMA_CHANNEL_REG_BASE(n) (0x100 + (n) * 0x20) +#define SUN4I_NDMA_CFG_REG 0x0 +#define SUN4I_NDMA_SRC_ADDR_REG 0x4 +#define SUN4I_NDMA_DST_ADDR_REG 0x8 +#define SUN4I_NDMA_BYTE_COUNT_REG 0xC + +/* Dedicated DMA register offsets */ +#define SUN4I_DDMA_CHANNEL_REG_BASE(n) (0x300 + (n) * 0x20) +#define SUN4I_DDMA_CFG_REG 0x0 +#define SUN4I_DDMA_SRC_ADDR_REG 0x4 +#define SUN4I_DDMA_DST_ADDR_REG 0x8 +#define SUN4I_DDMA_BYTE_COUNT_REG 0xC +#define SUN4I_DDMA_PARA_REG 0x18 + +/** DMA Driver **/ + +/* + * Normal DMA has 8 channels, and Dedicated DMA has another 8, so + * that's 16 channels. As for endpoints, there's 29 and 21 + * respectively. Given that the Normal DMA endpoints (other than + * SDRAM) can be used as tx/rx, we need 78 vchans in total + */ +#define SUN4I_NDMA_NR_MAX_CHANNELS 8 +#define SUN4I_DDMA_NR_MAX_CHANNELS 8 +#define SUN4I_DMA_NR_MAX_CHANNELS \ + (SUN4I_NDMA_NR_MAX_CHANNELS + SUN4I_DDMA_NR_MAX_CHANNELS) +#define SUN4I_NDMA_NR_MAX_VCHANS (29 * 2 - 1) +#define SUN4I_DDMA_NR_MAX_VCHANS 21 +#define SUN4I_DMA_NR_MAX_VCHANS \ + (SUN4I_NDMA_NR_MAX_VCHANS + SUN4I_DDMA_NR_MAX_VCHANS) + +/* This set of SUN4I_DDMA timing parameters were found experimentally while + * working with the SPI driver and seem to make it behave correctly */ +#define SUN4I_DDMA_MAGIC_SPI_PARAMETERS \ + (SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(1) | \ + SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(1) | \ + SUN4I_DDMA_PARA_DST_WAIT_CYCLES(2) | \ + SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(2)) + +struct sun4i_dma_pchan { + /* Register base of channel */ + void __iomem *base; + /* vchan currently being serviced */ + struct sun4i_dma_vchan *vchan; + /* Is this a dedicated pchan? */ + int is_dedicated; +}; + +struct sun4i_dma_vchan { + struct virt_dma_chan vc; + struct dma_slave_config cfg; + struct sun4i_dma_pchan *pchan; + struct sun4i_dma_promise *processing; + struct sun4i_dma_contract *contract; + u8 endpoint; + int is_dedicated; +}; + +struct sun4i_dma_promise { + u32 cfg; + u32 para; + dma_addr_t src; + dma_addr_t dst; + size_t len; + struct list_head list; +}; + +/* A contract is a set of promises */ +struct sun4i_dma_contract { + struct virt_dma_desc vd; + struct list_head demands; + struct list_head completed_demands; + int is_cyclic; +}; + +struct sun4i_dma_dev { + DECLARE_BITMAP(pchans_used, SUN4I_DMA_NR_MAX_CHANNELS); + struct dma_device slave; + struct sun4i_dma_pchan *pchans; + struct sun4i_dma_vchan *vchans; + void __iomem *base; + struct clk *clk; + int irq; + spinlock_t lock; +}; + +static struct sun4i_dma_dev *to_sun4i_dma_dev(struct dma_device *dev) +{ + return container_of(dev, struct sun4i_dma_dev, slave); +} + +static struct sun4i_dma_vchan *to_sun4i_dma_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct sun4i_dma_vchan, vc.chan); +} + +static struct sun4i_dma_contract *to_sun4i_dma_contract(struct virt_dma_desc *vd) +{ + return container_of(vd, struct sun4i_dma_contract, vd); +} + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static int convert_burst(u32 maxburst) +{ + if (maxburst > 8) + return -EINVAL; + + /* 1 -> 0, 4 -> 1, 8 -> 2 */ + return (maxburst >> 2); +} + +static int convert_buswidth(enum dma_slave_buswidth addr_width) +{ + if (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES) + return -EINVAL; + + /* 8 (1 byte) -> 0, 16 (2 bytes) -> 1, 32 (4 bytes) -> 2 */ + return (addr_width >> 1); +} + +static void sun4i_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + + vchan_free_chan_resources(&vchan->vc); +} + +static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv, + struct sun4i_dma_vchan *vchan) +{ + struct sun4i_dma_pchan *pchan = NULL, *pchans = priv->pchans; + unsigned long flags; + int i, max; + + /* + * pchans 0-SUN4I_NDMA_NR_MAX_CHANNELS are normal, and + * SUN4I_NDMA_NR_MAX_CHANNELS+ are dedicated ones + */ + if (vchan->is_dedicated) { + i = SUN4I_NDMA_NR_MAX_CHANNELS; + max = SUN4I_DMA_NR_MAX_CHANNELS; + } else { + i = 0; + max = SUN4I_NDMA_NR_MAX_CHANNELS; + } + + spin_lock_irqsave(&priv->lock, flags); + for_each_clear_bit_from(i, &priv->pchans_used, max) { + pchan = &pchans[i]; + pchan->vchan = vchan; + set_bit(i, priv->pchans_used); + break; + } + spin_unlock_irqrestore(&priv->lock, flags); + + return pchan; +} + +static void release_pchan(struct sun4i_dma_dev *priv, + struct sun4i_dma_pchan *pchan) +{ + unsigned long flags; + int nr = pchan - priv->pchans; + + spin_lock_irqsave(&priv->lock, flags); + + pchan->vchan = NULL; + clear_bit(nr, priv->pchans_used); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static void configure_pchan(struct sun4i_dma_pchan *pchan, + struct sun4i_dma_promise *d) +{ + /* + * Configure addresses and misc parameters depending on type + * SUN4I_DDMA has an extra field with timing parameters + */ + if (pchan->is_dedicated) { + writel_relaxed(d->src, pchan->base + SUN4I_DDMA_SRC_ADDR_REG); + writel_relaxed(d->dst, pchan->base + SUN4I_DDMA_DST_ADDR_REG); + writel_relaxed(d->len, pchan->base + SUN4I_DDMA_BYTE_COUNT_REG); + writel_relaxed(d->para, pchan->base + SUN4I_DDMA_PARA_REG); + writel_relaxed(d->cfg, pchan->base + SUN4I_DDMA_CFG_REG); + } else { + writel_relaxed(d->src, pchan->base + SUN4I_NDMA_SRC_ADDR_REG); + writel_relaxed(d->dst, pchan->base + SUN4I_NDMA_DST_ADDR_REG); + writel_relaxed(d->len, pchan->base + SUN4I_NDMA_BYTE_COUNT_REG); + writel_relaxed(d->cfg, pchan->base + SUN4I_NDMA_CFG_REG); + } +} + +static void set_pchan_interrupt(struct sun4i_dma_dev *priv, + struct sun4i_dma_pchan *pchan, + int half, int end) +{ + u32 reg; + int pchan_number = pchan - priv->pchans; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + reg = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + + if (half) + reg |= BIT(pchan_number * 2); + else + reg &= ~BIT(pchan_number * 2); + + if (end) + reg |= BIT(pchan_number * 2 + 1); + else + reg &= ~BIT(pchan_number * 2 + 1); + + writel_relaxed(reg, priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +/** + * Execute pending operations on a vchan + * + * When given a vchan, this function will try to acquire a suitable + * pchan and, if successful, will configure it to fulfill a promise + * from the next pending contract. + * + * This function must be called with &vchan->vc.lock held. + */ +static int __execute_vchan_pending(struct sun4i_dma_dev *priv, + struct sun4i_dma_vchan *vchan) +{ + struct sun4i_dma_promise *promise = NULL; + struct sun4i_dma_contract *contract = NULL; + struct sun4i_dma_pchan *pchan; + struct virt_dma_desc *vd; + int ret; + + lockdep_assert_held(&vchan->vc.lock); + + /* We need a pchan to do anything, so secure one if available */ + pchan = find_and_use_pchan(priv, vchan); + if (!pchan) + return -EBUSY; + + /* + * Channel endpoints must not be repeated, so if this vchan + * has already submitted some work, we can't do anything else + */ + if (vchan->processing) { + dev_dbg(chan2dev(&vchan->vc.chan), + "processing something to this endpoint already\n"); + ret = -EBUSY; + goto release_pchan; + } + + do { + /* Figure out which contract we're working with today */ + vd = vchan_next_desc(&vchan->vc); + if (!vd) { + dev_dbg(chan2dev(&vchan->vc.chan), + "No pending contract found"); + ret = 0; + goto release_pchan; + } + + contract = to_sun4i_dma_contract(vd); + if (list_empty(&contract->demands)) { + /* The contract has been completed so mark it as such */ + list_del(&contract->vd.node); + vchan_cookie_complete(&contract->vd); + dev_dbg(chan2dev(&vchan->vc.chan), + "Empty contract found and marked complete"); + } + } while (list_empty(&contract->demands)); + + /* Now find out what we need to do */ + promise = list_first_entry(&contract->demands, + struct sun4i_dma_promise, list); + vchan->processing = promise; + + /* ... and make it reality */ + if (promise) { + vchan->contract = contract; + vchan->pchan = pchan; + set_pchan_interrupt(priv, pchan, contract->is_cyclic, 1); + configure_pchan(pchan, promise); + } + + return 0; + +release_pchan: + release_pchan(priv, pchan); + return ret; +} + +static int sanitize_config(struct dma_slave_config *sconfig, + enum dma_transfer_direction direction) +{ + switch (direction) { + case DMA_MEM_TO_DEV: + if ((sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) || + !sconfig->dst_maxburst) + return -EINVAL; + + if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + sconfig->src_addr_width = sconfig->dst_addr_width; + + if (!sconfig->src_maxburst) + sconfig->src_maxburst = sconfig->dst_maxburst; + + break; + + case DMA_DEV_TO_MEM: + if ((sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) || + !sconfig->src_maxburst) + return -EINVAL; + + if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + sconfig->dst_addr_width = sconfig->src_addr_width; + + if (!sconfig->dst_maxburst) + sconfig->dst_maxburst = sconfig->src_maxburst; + + break; + default: + return 0; + } + + return 0; +} + +/** + * Generate a promise, to be used in a normal DMA contract. + * + * A NDMA promise contains all the information required to program the + * normal part of the DMA Engine and get data copied. A non-executed + * promise will live in the demands list on a contract. Once it has been + * completed, it will be moved to the completed demands list for later freeing. + * All linked promises will be freed when the corresponding contract is freed + */ +static struct sun4i_dma_promise * +generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, + size_t len, struct dma_slave_config *sconfig, + enum dma_transfer_direction direction) +{ + struct sun4i_dma_promise *promise; + int ret; + + ret = sanitize_config(sconfig, direction); + if (ret) + return NULL; + + promise = kzalloc(sizeof(*promise), GFP_NOWAIT); + if (!promise) + return NULL; + + promise->src = src; + promise->dst = dest; + promise->len = len; + promise->cfg = SUN4I_DMA_CFG_LOADING | + SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN; + + dev_dbg(chan2dev(chan), + "src burst %d, dst burst %d, src buswidth %d, dst buswidth %d", + sconfig->src_maxburst, sconfig->dst_maxburst, + sconfig->src_addr_width, sconfig->dst_addr_width); + + /* Source burst */ + ret = convert_burst(sconfig->src_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); + + /* Destination burst */ + ret = convert_burst(sconfig->dst_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); + + /* Source bus width */ + ret = convert_buswidth(sconfig->src_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + + /* Destination bus width */ + ret = convert_buswidth(sconfig->dst_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + + return promise; + +fail: + kfree(promise); + return NULL; +} + +/** + * Generate a promise, to be used in a dedicated DMA contract. + * + * A DDMA promise contains all the information required to program the + * Dedicated part of the DMA Engine and get data copied. A non-executed + * promise will live in the demands list on a contract. Once it has been + * completed, it will be moved to the completed demands list for later freeing. + * All linked promises will be freed when the corresponding contract is freed + */ +static struct sun4i_dma_promise * +generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, + size_t len, struct dma_slave_config *sconfig) +{ + struct sun4i_dma_promise *promise; + int ret; + + promise = kzalloc(sizeof(*promise), GFP_NOWAIT); + if (!promise) + return NULL; + + promise->src = src; + promise->dst = dest; + promise->len = len; + promise->cfg = SUN4I_DMA_CFG_LOADING | + SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN; + + /* Source burst */ + ret = convert_burst(sconfig->src_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); + + /* Destination burst */ + ret = convert_burst(sconfig->dst_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); + + /* Source bus width */ + ret = convert_buswidth(sconfig->src_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + + /* Destination bus width */ + ret = convert_buswidth(sconfig->dst_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + + return promise; + +fail: + kfree(promise); + return NULL; +} + +/** + * Generate a contract + * + * Contracts function as DMA descriptors. As our hardware does not support + * linked lists, we need to implement SG via software. We use a contract + * to hold all the pieces of the request and process them serially one + * after another. Each piece is represented as a promise. + */ +static struct sun4i_dma_contract *generate_dma_contract(void) +{ + struct sun4i_dma_contract *contract; + + contract = kzalloc(sizeof(*contract), GFP_NOWAIT); + if (!contract) + return NULL; + + INIT_LIST_HEAD(&contract->demands); + INIT_LIST_HEAD(&contract->completed_demands); + + return contract; +} + +/** + * Get next promise on a cyclic transfer + * + * Cyclic contracts contain a series of promises which are executed on a + * loop. This function returns the next promise from a cyclic contract, + * so it can be programmed into the hardware. + */ +static struct sun4i_dma_promise * +get_next_cyclic_promise(struct sun4i_dma_contract *contract) +{ + struct sun4i_dma_promise *promise; + + promise = list_first_entry_or_null(&contract->demands, + struct sun4i_dma_promise, list); + if (!promise) { + list_splice_init(&contract->completed_demands, + &contract->demands); + promise = list_first_entry(&contract->demands, + struct sun4i_dma_promise, list); + } + + return promise; +} + +/** + * Free a contract and all its associated promises + */ +static void sun4i_dma_free_contract(struct virt_dma_desc *vd) +{ + struct sun4i_dma_contract *contract = to_sun4i_dma_contract(vd); + struct sun4i_dma_promise *promise; + + /* Free all the demands and completed demands */ + list_for_each_entry(promise, &contract->demands, list) + kfree(promise); + + list_for_each_entry(promise, &contract->completed_demands, list) + kfree(promise); + + kfree(contract); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + /* + * We can only do the copy to bus aligned addresses, so + * choose the best one so we get decent performance. We also + * maximize the burst size for this same reason. + */ + sconfig->src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + sconfig->dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + sconfig->src_maxburst = 8; + sconfig->dst_maxburst = 8; + + if (vchan->is_dedicated) + promise = generate_ddma_promise(chan, src, dest, len, sconfig); + else + promise = generate_ndma_promise(chan, src, dest, len, sconfig, + DMA_MEM_TO_MEM); + + if (!promise) { + kfree(contract); + return NULL; + } + + /* Configure memcpy mode */ + if (vchan->is_dedicated) { + promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM); + } else { + promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM); + } + + /* Fill the contract with our only promise */ + list_add_tail(&promise->list, &contract->demands); + + /* And add it to the vchan */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len, + size_t period_len, enum dma_transfer_direction dir, + unsigned long flags) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + dma_addr_t src, dest; + u32 endpoints; + int nr_periods, offset, plength, i; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + if (vchan->is_dedicated) { + /* + * As we are using this just for audio data, we need to use + * normal DMA. There is nothing stopping us from supporting + * dedicated DMA here as well, so if a client comes up and + * requires it, it will be simple to implement it. + */ + dev_err(chan2dev(chan), + "Cyclic transfers are only supported on Normal DMA\n"); + return NULL; + } + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + contract->is_cyclic = 1; + + /* Figure out the endpoints and the address we need */ + if (dir == DMA_MEM_TO_DEV) { + src = buf; + dest = sconfig->dst_addr; + endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_DST_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO); + } else { + src = sconfig->src_addr; + dest = buf; + endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM); + } + + /* + * We will be using half done interrupts to make two periods + * out of a promise, so we need to program the DMA engine less + * often + */ + + /* + * The engine can interrupt on half-transfer, so we can use + * this feature to program the engine half as often as if we + * didn't use it (keep in mind the hardware doesn't support + * linked lists). + * + * Say you have a set of periods (| marks the start/end, I for + * interrupt, P for programming the engine to do a new + * transfer), the easy but slow way would be to do + * + * |---|---|---|---| (periods / promises) + * P I,P I,P I,P I + * + * Using half transfer interrupts you can do + * + * |-------|-------| (promises as configured on hw) + * |---|---|---|---| (periods) + * P I I,P I I + * + * Which requires half the engine programming for the same + * functionality. + */ + nr_periods = DIV_ROUND_UP(len / period_len, 2); + for (i = 0; i < nr_periods; i++) { + /* Calculate the offset in the buffer and the length needed */ + offset = i * period_len * 2; + plength = min((len - offset), (period_len * 2)); + if (dir == DMA_MEM_TO_DEV) + src = buf + offset; + else + dest = buf + offset; + + /* Make the promise */ + promise = generate_ndma_promise(chan, src, dest, + plength, sconfig, dir); + if (!promise) { + /* TODO: should we free everything? */ + return NULL; + } + promise->cfg |= endpoints; + + /* Then add it to the contract */ + list_add_tail(&promise->list, &contract->demands); + } + + /* And add it to the vchan */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + u8 ram_type, io_mode, linear_mode; + struct scatterlist *sg; + dma_addr_t srcaddr, dstaddr; + u32 endpoints, para; + int i; + + if (!sgl) + return NULL; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + if (vchan->is_dedicated) { + io_mode = SUN4I_DDMA_ADDR_MODE_IO; + linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM; + } else { + io_mode = SUN4I_NDMA_ADDR_MODE_IO; + linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM; + } + + if (dir == DMA_MEM_TO_DEV) + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(linear_mode); + else + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_DST_ADDR_MODE(linear_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode); + + for_each_sg(sgl, sg, sg_len, i) { + /* Figure out addresses */ + if (dir == DMA_MEM_TO_DEV) { + srcaddr = sg_dma_address(sg); + dstaddr = sconfig->dst_addr; + } else { + srcaddr = sconfig->src_addr; + dstaddr = sg_dma_address(sg); + } + + /* + * These are the magic DMA engine timings that keep SPI going. + * I haven't seen any interface on DMAEngine to configure + * timings, and so far they seem to work for everything we + * support, so I've kept them here. I don't know if other + * devices need different timings because, as usual, we only + * have the "para" bitfield meanings, but no comment on what + * the values should be when doing a certain operation :| + */ + para = SUN4I_DDMA_MAGIC_SPI_PARAMETERS; + + /* And make a suitable promise */ + if (vchan->is_dedicated) + promise = generate_ddma_promise(chan, srcaddr, dstaddr, + sg_dma_len(sg), + sconfig); + else + promise = generate_ndma_promise(chan, srcaddr, dstaddr, + sg_dma_len(sg), + sconfig, dir); + + if (!promise) + return NULL; /* TODO: should we free everything? */ + + promise->cfg |= endpoints; + promise->para = para; + + /* Then add it to the contract */ + list_add_tail(&promise->list, &contract->demands); + } + + /* + * Once we've got all the promises ready, add the contract + * to the pending list on the vchan + */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static int sun4i_dma_terminate_all(struct dma_chan *chan) +{ + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct sun4i_dma_pchan *pchan = vchan->pchan; + LIST_HEAD(head); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + vchan_get_all_descriptors(&vchan->vc, &head); + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + /* + * Clearing the configuration register will halt the pchan. Interrupts + * may still trigger, so don't forget to disable them. + */ + if (pchan) { + if (pchan->is_dedicated) + writel(0, pchan->base + SUN4I_DDMA_CFG_REG); + else + writel(0, pchan->base + SUN4I_NDMA_CFG_REG); + set_pchan_interrupt(priv, pchan, 0, 0); + release_pchan(priv, pchan); + } + + spin_lock_irqsave(&vchan->vc.lock, flags); + vchan_dma_desc_free_list(&vchan->vc, &head); + /* Clear these so the vchan is usable again */ + vchan->processing = NULL; + vchan->pchan = NULL; + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return 0; +} + +static int sun4i_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + + memcpy(&vchan->cfg, config, sizeof(*config)); + + return 0; +} + +static struct dma_chan *sun4i_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct sun4i_dma_dev *priv = ofdma->of_dma_data; + struct sun4i_dma_vchan *vchan; + struct dma_chan *chan; + u8 is_dedicated = dma_spec->args[0]; + u8 endpoint = dma_spec->args[1]; + + /* Check if type is Normal or Dedicated */ + if (is_dedicated != 0 && is_dedicated != 1) + return NULL; + + /* Make sure the endpoint looks sane */ + if ((is_dedicated && endpoint >= SUN4I_DDMA_DRQ_TYPE_LIMIT) || + (!is_dedicated && endpoint >= SUN4I_NDMA_DRQ_TYPE_LIMIT)) + return NULL; + + chan = dma_get_any_slave_channel(&priv->slave); + if (!chan) + return NULL; + + /* Assign the endpoint to the vchan */ + vchan = to_sun4i_dma_vchan(chan); + vchan->is_dedicated = is_dedicated; + vchan->endpoint = endpoint; + + return chan; +} + +static enum dma_status sun4i_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct sun4i_dma_pchan *pchan = vchan->pchan; + struct sun4i_dma_contract *contract; + struct sun4i_dma_promise *promise; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + ret = dma_cookie_status(chan, cookie, state); + if (!state || (ret == DMA_COMPLETE)) + return ret; + + spin_lock_irqsave(&vchan->vc.lock, flags); + vd = vchan_find_desc(&vchan->vc, cookie); + if (!vd) + goto exit; + contract = to_sun4i_dma_contract(vd); + + list_for_each_entry(promise, &contract->demands, list) + bytes += promise->len; + + /* + * The hardware is configured to return the remaining byte + * quantity. If possible, replace the first listed element's + * full size with the actual remaining amount + */ + promise = list_first_entry_or_null(&contract->demands, + struct sun4i_dma_promise, list); + if (promise && pchan) { + bytes -= promise->len; + if (pchan->is_dedicated) + bytes += readl(pchan->base + SUN4I_DDMA_BYTE_COUNT_REG); + else + bytes += readl(pchan->base + SUN4I_NDMA_BYTE_COUNT_REG); + } + +exit: + + dma_set_residue(state, bytes); + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return ret; +} + +static void sun4i_dma_issue_pending(struct dma_chan *chan) +{ + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + /* + * If there are pending transactions for this vchan, push one of + * them into the engine to get the ball rolling. + */ + if (vchan_issue_pending(&vchan->vc)) + __execute_vchan_pending(priv, vchan); + + spin_unlock_irqrestore(&vchan->vc.lock, flags); +} + +static irqreturn_t sun4i_dma_interrupt(int irq, void *dev_id) +{ + struct sun4i_dma_dev *priv = dev_id; + struct sun4i_dma_pchan *pchans = priv->pchans, *pchan; + struct sun4i_dma_vchan *vchan; + struct sun4i_dma_contract *contract; + struct sun4i_dma_promise *promise; + unsigned long pendirq, irqs, disableirqs; + int bit, i, free_room, allow_mitigation = 1; + + pendirq = readl_relaxed(priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + +handle_pending: + + disableirqs = 0; + free_room = 0; + + for_each_set_bit(bit, &pendirq, 32) { + pchan = &pchans[bit >> 1]; + vchan = pchan->vchan; + if (!vchan) /* a terminated channel may still interrupt */ + continue; + contract = vchan->contract; + + /* + * Disable the IRQ and free the pchan if it's an end + * interrupt (odd bit) + */ + if (bit & 1) { + spin_lock(&vchan->vc.lock); + + /* + * Move the promise into the completed list now that + * we're done with it + */ + list_del(&vchan->processing->list); + list_add_tail(&vchan->processing->list, + &contract->completed_demands); + + /* + * Cyclic DMA transfers are special: + * - There's always something we can dispatch + * - We need to run the callback + * - Latency is very important, as this is used by audio + * We therefore just cycle through the list and dispatch + * whatever we have here, reusing the pchan. There's + * no need to run the thread after this. + * + * For non-cyclic transfers we need to look around, + * so we can program some more work, or notify the + * client that their transfers have been completed. + */ + if (contract->is_cyclic) { + promise = get_next_cyclic_promise(contract); + vchan->processing = promise; + configure_pchan(pchan, promise); + vchan_cyclic_callback(&contract->vd); + } else { + vchan->processing = NULL; + vchan->pchan = NULL; + + free_room = 1; + disableirqs |= BIT(bit); + release_pchan(priv, pchan); + } + + spin_unlock(&vchan->vc.lock); + } else { + /* Half done interrupt */ + if (contract->is_cyclic) + vchan_cyclic_callback(&contract->vd); + else + disableirqs |= BIT(bit); + } + } + + /* Disable the IRQs for events we handled */ + spin_lock(&priv->lock); + irqs = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + writel_relaxed(irqs & ~disableirqs, + priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + spin_unlock(&priv->lock); + + /* Writing 1 to the pending field will clear the pending interrupt */ + writel_relaxed(pendirq, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + + /* + * If a pchan was freed, we may be able to schedule something else, + * so have a look around + */ + if (free_room) { + for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) { + vchan = &priv->vchans[i]; + spin_lock(&vchan->vc.lock); + __execute_vchan_pending(priv, vchan); + spin_unlock(&vchan->vc.lock); + } + } + + /* + * Handle newer interrupts if some showed up, but only do it once + * to avoid a too long a loop + */ + if (allow_mitigation) { + pendirq = readl_relaxed(priv->base + + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + if (pendirq) { + allow_mitigation = 0; + goto handle_pending; + } + } + + return IRQ_HANDLED; +} + +static int sun4i_dma_probe(struct platform_device *pdev) +{ + struct sun4i_dma_dev *priv; + struct resource *res; + int i, j, ret; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); + + priv->irq = platform_get_irq(pdev, 0); + if (priv->irq < 0) { + dev_err(&pdev->dev, "Cannot claim IRQ\n"); + return priv->irq; + } + + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "No clock specified\n"); + return PTR_ERR(priv->clk); + } + + platform_set_drvdata(pdev, priv); + spin_lock_init(&priv->lock); + + dma_cap_zero(priv->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, priv->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, priv->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, priv->slave.cap_mask); + dma_cap_set(DMA_SLAVE, priv->slave.cap_mask); + + INIT_LIST_HEAD(&priv->slave.channels); + priv->slave.device_free_chan_resources = sun4i_dma_free_chan_resources; + priv->slave.device_tx_status = sun4i_dma_tx_status; + priv->slave.device_issue_pending = sun4i_dma_issue_pending; + priv->slave.device_prep_slave_sg = sun4i_dma_prep_slave_sg; + priv->slave.device_prep_dma_memcpy = sun4i_dma_prep_dma_memcpy; + priv->slave.device_prep_dma_cyclic = sun4i_dma_prep_dma_cyclic; + priv->slave.device_config = sun4i_dma_config; + priv->slave.device_terminate_all = sun4i_dma_terminate_all; + priv->slave.copy_align = 2; + priv->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + priv->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + priv->slave.directions = BIT(DMA_DEV_TO_MEM) | + BIT(DMA_MEM_TO_DEV); + priv->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + priv->slave.dev = &pdev->dev; + + priv->pchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_CHANNELS, + sizeof(struct sun4i_dma_pchan), GFP_KERNEL); + priv->vchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_VCHANS, + sizeof(struct sun4i_dma_vchan), GFP_KERNEL); + if (!priv->vchans || !priv->pchans) + return -ENOMEM; + + /* + * [0..SUN4I_NDMA_NR_MAX_CHANNELS) are normal pchans, and + * [SUN4I_NDMA_NR_MAX_CHANNELS..SUN4I_DMA_NR_MAX_CHANNELS) are + * dedicated ones + */ + for (i = 0; i < SUN4I_NDMA_NR_MAX_CHANNELS; i++) + priv->pchans[i].base = priv->base + + SUN4I_NDMA_CHANNEL_REG_BASE(i); + + for (j = 0; i < SUN4I_DMA_NR_MAX_CHANNELS; i++, j++) { + priv->pchans[i].base = priv->base + + SUN4I_DDMA_CHANNEL_REG_BASE(j); + priv->pchans[i].is_dedicated = 1; + } + + for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) { + struct sun4i_dma_vchan *vchan = &priv->vchans[i]; + + spin_lock_init(&vchan->vc.lock); + vchan->vc.desc_free = sun4i_dma_free_contract; + vchan_init(&vchan->vc, &priv->slave); + } + + ret = clk_prepare_enable(priv->clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the clock\n"); + return ret; + } + + /* + * Make sure the IRQs are all disabled and accounted for. The bootloader + * likes to leave these dirty + */ + writel(0, priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + writel(0xFFFFFFFF, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + + ret = devm_request_irq(&pdev->dev, priv->irq, sun4i_dma_interrupt, + 0, dev_name(&pdev->dev), priv); + if (ret) { + dev_err(&pdev->dev, "Cannot request IRQ\n"); + goto err_clk_disable; + } + + ret = dma_async_device_register(&priv->slave); + if (ret) { + dev_warn(&pdev->dev, "Failed to register DMA engine device\n"); + goto err_clk_disable; + } + + ret = of_dma_controller_register(pdev->dev.of_node, sun4i_dma_of_xlate, + priv); + if (ret) { + dev_err(&pdev->dev, "of_dma_controller_register failed\n"); + goto err_dma_unregister; + } + + dev_dbg(&pdev->dev, "Successfully probed SUN4I_DMA\n"); + + return 0; + +err_dma_unregister: + dma_async_device_unregister(&priv->slave); +err_clk_disable: + clk_disable_unprepare(priv->clk); + return ret; +} + +static int sun4i_dma_remove(struct platform_device *pdev) +{ + struct sun4i_dma_dev *priv = platform_get_drvdata(pdev); + + /* Disable IRQ so no more work is scheduled */ + disable_irq(priv->irq); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&priv->slave); + + clk_disable_unprepare(priv->clk); + + return 0; +} + +static const struct of_device_id sun4i_dma_match[] = { + { .compatible = "allwinner,sun4i-a10-dma" }, + { /* sentinel */ }, +}; + +static struct platform_driver sun4i_dma_driver = { + .probe = sun4i_dma_probe, + .remove = sun4i_dma_remove, + .driver = { + .name = "sun4i-dma", + .of_match_table = sun4i_dma_match, + }, +}; + +module_platform_driver(sun4i_dma_driver); + +MODULE_DESCRIPTION("Allwinner A10 Dedicated DMA Controller Driver"); +MODULE_AUTHOR("Emilio López <emilio@elopez.com.ar>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 842ff97c2cfb..73e0be6e2100 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -969,7 +969,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) sdc->slave.device_issue_pending = sun6i_dma_issue_pending; sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; - sdc->slave.copy_align = 4; + sdc->slave.copy_align = DMAENGINE_ALIGN_4_BYTES; sdc->slave.device_config = sun6i_dma_config; sdc->slave.device_pause = sun6i_dma_pause; sdc->slave.device_resume = sun6i_dma_resume; diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index eaf585e8286b..c8f79dcaaee8 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -155,7 +155,6 @@ struct tegra_dma_sg_req { int req_len; bool configured; bool last_sg; - bool half_done; struct list_head node; struct tegra_dma_desc *dma_desc; }; @@ -188,7 +187,7 @@ struct tegra_dma_channel { bool config_init; int id; int irq; - unsigned long chan_base_offset; + void __iomem *chan_addr; spinlock_t lock; bool busy; struct tegra_dma *tdma; @@ -203,8 +202,6 @@ struct tegra_dma_channel { /* ISR handler and tasklet for bottom half of isr handling */ dma_isr_handler isr_handler; struct tasklet_struct tasklet; - dma_async_tx_callback callback; - void *callback_param; /* Channel-slave specific configuration */ unsigned int slave_id; @@ -222,6 +219,13 @@ struct tegra_dma { void __iomem *base_addr; const struct tegra_dma_chip_data *chip_data; + /* + * Counter for managing global pausing of the DMA controller. + * Only applicable for devices that don't support individual + * channel pausing. + */ + u32 global_pause_count; + /* Some register need to be cache before suspend */ u32 reg_gen; @@ -242,12 +246,12 @@ static inline u32 tdma_read(struct tegra_dma *tdma, u32 reg) static inline void tdc_write(struct tegra_dma_channel *tdc, u32 reg, u32 val) { - writel(val, tdc->tdma->base_addr + tdc->chan_base_offset + reg); + writel(val, tdc->chan_addr + reg); } static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg) { - return readl(tdc->tdma->base_addr + tdc->chan_base_offset + reg); + return readl(tdc->chan_addr + reg); } static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc) @@ -361,16 +365,32 @@ static void tegra_dma_global_pause(struct tegra_dma_channel *tdc, struct tegra_dma *tdma = tdc->tdma; spin_lock(&tdma->global_lock); - tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0); - if (wait_for_burst_complete) - udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + + if (tdc->tdma->global_pause_count == 0) { + tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0); + if (wait_for_burst_complete) + udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + } + + tdc->tdma->global_pause_count++; + + spin_unlock(&tdma->global_lock); } static void tegra_dma_global_resume(struct tegra_dma_channel *tdc) { struct tegra_dma *tdma = tdc->tdma; - tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE); + spin_lock(&tdma->global_lock); + + if (WARN_ON(tdc->tdma->global_pause_count == 0)) + goto out; + + if (--tdc->tdma->global_pause_count == 0) + tdma_write(tdma, TEGRA_APBDMA_GENERAL, + TEGRA_APBDMA_GENERAL_ENABLE); + +out: spin_unlock(&tdma->global_lock); } @@ -601,7 +621,6 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc, return; tdc_start_head_req(tdc); - return; } static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc, @@ -628,7 +647,6 @@ static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc, if (!st) dma_desc->dma_status = DMA_ERROR; } - return; } static void tegra_dma_tasklet(unsigned long data) @@ -720,7 +738,6 @@ static void tegra_dma_issue_pending(struct dma_chan *dc) } end: spin_unlock_irqrestore(&tdc->lock, flags); - return; } static int tegra_dma_terminate_all(struct dma_chan *dc) @@ -932,7 +949,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg( struct tegra_dma_sg_req *sg_req = NULL; u32 burst_size; enum dma_slave_buswidth slave_bw; - int ret; if (!tdc->config_init) { dev_err(tdc2dev(tdc), "dma channel is not configured\n"); @@ -943,9 +959,8 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg( return NULL; } - ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, - &burst_size, &slave_bw); - if (ret < 0) + if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, + &burst_size, &slave_bw) < 0) return NULL; INIT_LIST_HEAD(&req_list); @@ -1048,7 +1063,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( dma_addr_t mem = buf_addr; u32 burst_size; enum dma_slave_buswidth slave_bw; - int ret; if (!buf_len || !period_len) { dev_err(tdc2dev(tdc), "Invalid buffer/period len\n"); @@ -1087,12 +1101,10 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( return NULL; } - ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, - &burst_size, &slave_bw); - if (ret < 0) + if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, + &burst_size, &slave_bw) < 0) return NULL; - ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB; ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE << TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT; @@ -1136,7 +1148,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( sg_req->ch_regs.apb_seq = apb_seq; sg_req->ch_regs.ahb_seq = ahb_seq; sg_req->configured = false; - sg_req->half_done = false; sg_req->last_sg = false; sg_req->dma_desc = dma_desc; sg_req->req_len = len; @@ -1377,8 +1388,9 @@ static int tegra_dma_probe(struct platform_device *pdev) for (i = 0; i < cdata->nr_channels; i++) { struct tegra_dma_channel *tdc = &tdma->channels[i]; - tdc->chan_base_offset = TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET + - i * cdata->channel_reg_size; + tdc->chan_addr = tdma->base_addr + + TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET + + (i * cdata->channel_reg_size); res = platform_get_resource(pdev, IORESOURCE_IRQ, i); if (!res) { @@ -1418,6 +1430,7 @@ static int tegra_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); + tdma->global_pause_count = 0; tdma->dma_dev.dev = &pdev->dev; tdma->dma_dev.device_alloc_chan_resources = tegra_dma_alloc_chan_resources; diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 24f5ca2356bf..5cce8c9d0026 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -20,16 +20,19 @@ #define TI_XBAR_OUTPUTS 127 #define TI_XBAR_INPUTS 256 -static DEFINE_IDR(map_idr); +#define TI_XBAR_EDMA_OFFSET 0 +#define TI_XBAR_SDMA_OFFSET 1 struct ti_dma_xbar_data { void __iomem *iomem; struct dma_router dmarouter; + struct idr map_idr; u16 safe_val; /* Value to rest the crossbar lines */ u32 xbar_requests; /* number of DMA requests connected to XBAR */ u32 dma_requests; /* number of DMA requests forwarded to DMA */ + u32 dma_offset; }; struct ti_dma_xbar_map { @@ -51,7 +54,7 @@ static void ti_dma_xbar_free(struct device *dev, void *route_data) map->xbar_in, map->xbar_out); ti_dma_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val); - idr_remove(&map_idr, map->xbar_out); + idr_remove(&xbar->map_idr, map->xbar_out); kfree(map); } @@ -81,12 +84,11 @@ static void *ti_dma_xbar_route_allocate(struct of_phandle_args *dma_spec, return ERR_PTR(-ENOMEM); } - map->xbar_out = idr_alloc(&map_idr, NULL, 0, xbar->dma_requests, + map->xbar_out = idr_alloc(&xbar->map_idr, NULL, 0, xbar->dma_requests, GFP_KERNEL); map->xbar_in = (u16)dma_spec->args[0]; - /* The DMA request is 1 based in sDMA */ - dma_spec->args[0] = map->xbar_out + 1; + dma_spec->args[0] = map->xbar_out + xbar->dma_offset; dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n", map->xbar_in, map->xbar_out); @@ -96,9 +98,22 @@ static void *ti_dma_xbar_route_allocate(struct of_phandle_args *dma_spec, return map; } +static const struct of_device_id ti_dma_master_match[] = { + { + .compatible = "ti,omap4430-sdma", + .data = (void *)TI_XBAR_SDMA_OFFSET, + }, + { + .compatible = "ti,edma3", + .data = (void *)TI_XBAR_EDMA_OFFSET, + }, + {}, +}; + static int ti_dma_xbar_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; + const struct of_device_id *match; struct device_node *dma_node; struct ti_dma_xbar_data *xbar; struct resource *res; @@ -113,12 +128,20 @@ static int ti_dma_xbar_probe(struct platform_device *pdev) if (!xbar) return -ENOMEM; + idr_init(&xbar->map_idr); + dma_node = of_parse_phandle(node, "dma-masters", 0); if (!dma_node) { dev_err(&pdev->dev, "Can't get DMA master node\n"); return -ENODEV; } + match = of_match_node(ti_dma_master_match, dma_node); + if (!match) { + dev_err(&pdev->dev, "DMA master is not supported\n"); + return -EINVAL; + } + if (of_property_read_u32(dma_node, "dma-requests", &xbar->dma_requests)) { dev_info(&pdev->dev, @@ -139,17 +162,15 @@ static int ti_dma_xbar_probe(struct platform_device *pdev) xbar->safe_val = (u16)safe_val; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENODEV; - iomem = devm_ioremap_resource(&pdev->dev, res); - if (!iomem) - return -ENOMEM; + if (IS_ERR(iomem)) + return PTR_ERR(iomem); xbar->iomem = iomem; xbar->dmarouter.dev = &pdev->dev; xbar->dmarouter.route_free = ti_dma_xbar_free; + xbar->dma_offset = (u32)match->data; platform_set_drvdata(pdev, xbar); diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c index c4c3d93fdd1b..559cd4073698 100644 --- a/drivers/dma/timb_dma.c +++ b/drivers/dma/timb_dma.c @@ -10,10 +10,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* Supports: diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index dff22ab01851..b23e8d52d126 100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -21,6 +21,7 @@ * NOTE: PM support is currently not available. */ +#include <linux/acpi.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/dma-mapping.h> @@ -151,7 +152,6 @@ #define XGENE_DMA_PQ_CHANNEL 1 #define XGENE_DMA_MAX_BYTE_CNT 0x4000 /* 16 KB */ #define XGENE_DMA_MAX_64B_DESC_BYTE_CNT 0x14000 /* 80 KB */ -#define XGENE_DMA_XOR_ALIGNMENT 6 /* 64 Bytes */ #define XGENE_DMA_MAX_XOR_SRC 5 #define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0 #define XGENE_DMA_INVALID_LEN_CODE 0x7800000000000000ULL @@ -764,12 +764,17 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) struct xgene_dma_ring *ring = &chan->rx_ring; struct xgene_dma_desc_sw *desc_sw, *_desc_sw; struct xgene_dma_desc_hw *desc_hw; + struct list_head ld_completed; u8 status; + INIT_LIST_HEAD(&ld_completed); + + spin_lock_bh(&chan->lock); + /* Clean already completed and acked descriptors */ xgene_dma_clean_completed_descriptor(chan); - /* Run the callback for each descriptor, in order */ + /* Move all completed descriptors to ld completed queue, in order */ list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) { /* Get subsequent hw descriptor from DMA rx ring */ desc_hw = &ring->desc_hw[ring->head]; @@ -812,15 +817,17 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) /* Mark this hw descriptor as processed */ desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); - xgene_dma_run_tx_complete_actions(chan, desc_sw); - - xgene_dma_clean_running_descriptor(chan, desc_sw); - /* * Decrement the pending transaction count * as we have processed one */ chan->pending--; + + /* + * Delete this node from ld running queue and append it to + * ld completed queue for further processing + */ + list_move_tail(&desc_sw->node, &ld_completed); } /* @@ -829,6 +836,14 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) * ahead and free the descriptors below. */ xgene_chan_xfer_ld_pending(chan); + + spin_unlock_bh(&chan->lock); + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc_sw, _desc_sw, &ld_completed, node) { + xgene_dma_run_tx_complete_actions(chan, desc_sw); + xgene_dma_clean_running_descriptor(chan, desc_sw); + } } static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan) @@ -877,11 +892,11 @@ static void xgene_dma_free_chan_resources(struct dma_chan *dchan) if (!chan->desc_pool) return; - spin_lock_bh(&chan->lock); - /* Process all running descriptor */ xgene_dma_cleanup_descriptors(chan); + spin_lock_bh(&chan->lock); + /* Clean all link descriptor queues */ xgene_dma_free_desc_list(chan, &chan->ld_pending); xgene_dma_free_desc_list(chan, &chan->ld_running); @@ -1201,15 +1216,11 @@ static void xgene_dma_tasklet_cb(unsigned long data) { struct xgene_dma_chan *chan = (struct xgene_dma_chan *)data; - spin_lock_bh(&chan->lock); - /* Run all cleanup for descriptors which have been completed */ xgene_dma_cleanup_descriptors(chan); /* Re-enable DMA channel IRQ */ enable_irq(chan->rx_irq); - - spin_unlock_bh(&chan->lock); } static irqreturn_t xgene_dma_chan_ring_isr(int irq, void *id) @@ -1741,13 +1752,13 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan, if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { dma_dev->device_prep_dma_xor = xgene_dma_prep_xor; dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC; - dma_dev->xor_align = XGENE_DMA_XOR_ALIGNMENT; + dma_dev->xor_align = DMAENGINE_ALIGN_64_BYTES; } if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { dma_dev->device_prep_dma_pq = xgene_dma_prep_pq; dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC; - dma_dev->pq_align = XGENE_DMA_XOR_ALIGNMENT; + dma_dev->pq_align = DMAENGINE_ALIGN_64_BYTES; } } @@ -1944,16 +1955,18 @@ static int xgene_dma_probe(struct platform_device *pdev) return ret; pdma->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(pdma->clk)) { + if (IS_ERR(pdma->clk) && !ACPI_COMPANION(&pdev->dev)) { dev_err(&pdev->dev, "Failed to get clk\n"); return PTR_ERR(pdma->clk); } /* Enable clk before accessing registers */ - ret = clk_prepare_enable(pdma->clk); - if (ret) { - dev_err(&pdev->dev, "Failed to enable clk %d\n", ret); - return ret; + if (!IS_ERR(pdma->clk)) { + ret = clk_prepare_enable(pdma->clk); + if (ret) { + dev_err(&pdev->dev, "Failed to enable clk %d\n", ret); + return ret; + } } /* Remove DMA RAM out of shutdown */ @@ -1998,7 +2011,8 @@ err_request_irq: err_dma_mask: err_clk_enable: - clk_disable_unprepare(pdma->clk); + if (!IS_ERR(pdma->clk)) + clk_disable_unprepare(pdma->clk); return ret; } @@ -2022,11 +2036,20 @@ static int xgene_dma_remove(struct platform_device *pdev) xgene_dma_delete_chan_rings(chan); } - clk_disable_unprepare(pdma->clk); + if (!IS_ERR(pdma->clk)) + clk_disable_unprepare(pdma->clk); return 0; } +#ifdef CONFIG_ACPI +static const struct acpi_device_id xgene_dma_acpi_match_ptr[] = { + {"APMC0D43", 0}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, xgene_dma_acpi_match_ptr); +#endif + static const struct of_device_id xgene_dma_of_match_ptr[] = { {.compatible = "apm,xgene-storm-dma",}, {}, @@ -2039,6 +2062,7 @@ static struct platform_driver xgene_dma_driver = { .driver = { .name = "X-Gene-DMA", .of_match_table = xgene_dma_of_match_ptr, + .acpi_match_table = ACPI_PTR(xgene_dma_acpi_match_ptr), }, }; diff --git a/drivers/dma/zx296702_dma.c b/drivers/dma/zx296702_dma.c new file mode 100644 index 000000000000..39915a6b7986 --- /dev/null +++ b/drivers/dma/zx296702_dma.c @@ -0,0 +1,951 @@ +/* + * Copyright 2015 Linaro. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/sched.h> +#include <linux/device.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/of_device.h> +#include <linux/of.h> +#include <linux/clk.h> +#include <linux/of_dma.h> + +#include "virt-dma.h" + +#define DRIVER_NAME "zx-dma" +#define DMA_ALIGN 4 +#define DMA_MAX_SIZE (0x10000 - PAGE_SIZE) +#define LLI_BLOCK_SIZE (4 * PAGE_SIZE) + +#define REG_ZX_SRC_ADDR 0x00 +#define REG_ZX_DST_ADDR 0x04 +#define REG_ZX_TX_X_COUNT 0x08 +#define REG_ZX_TX_ZY_COUNT 0x0c +#define REG_ZX_SRC_ZY_STEP 0x10 +#define REG_ZX_DST_ZY_STEP 0x14 +#define REG_ZX_LLI_ADDR 0x1c +#define REG_ZX_CTRL 0x20 +#define REG_ZX_TC_IRQ 0x800 +#define REG_ZX_SRC_ERR_IRQ 0x804 +#define REG_ZX_DST_ERR_IRQ 0x808 +#define REG_ZX_CFG_ERR_IRQ 0x80c +#define REG_ZX_TC_IRQ_RAW 0x810 +#define REG_ZX_SRC_ERR_IRQ_RAW 0x814 +#define REG_ZX_DST_ERR_IRQ_RAW 0x818 +#define REG_ZX_CFG_ERR_IRQ_RAW 0x81c +#define REG_ZX_STATUS 0x820 +#define REG_ZX_DMA_GRP_PRIO 0x824 +#define REG_ZX_DMA_ARB 0x828 + +#define ZX_FORCE_CLOSE BIT(31) +#define ZX_DST_BURST_WIDTH(x) (((x) & 0x7) << 13) +#define ZX_MAX_BURST_LEN 16 +#define ZX_SRC_BURST_LEN(x) (((x) & 0xf) << 9) +#define ZX_SRC_BURST_WIDTH(x) (((x) & 0x7) << 6) +#define ZX_IRQ_ENABLE_ALL (3 << 4) +#define ZX_DST_FIFO_MODE BIT(3) +#define ZX_SRC_FIFO_MODE BIT(2) +#define ZX_SOFT_REQ BIT(1) +#define ZX_CH_ENABLE BIT(0) + +#define ZX_DMA_BUSWIDTHS \ + (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +enum zx_dma_burst_width { + ZX_DMA_WIDTH_8BIT = 0, + ZX_DMA_WIDTH_16BIT = 1, + ZX_DMA_WIDTH_32BIT = 2, + ZX_DMA_WIDTH_64BIT = 3, +}; + +struct zx_desc_hw { + u32 saddr; + u32 daddr; + u32 src_x; + u32 src_zy; + u32 src_zy_step; + u32 dst_zy_step; + u32 reserved1; + u32 lli; + u32 ctr; + u32 reserved[7]; /* pack as hardware registers region size */ +} __aligned(32); + +struct zx_dma_desc_sw { + struct virt_dma_desc vd; + dma_addr_t desc_hw_lli; + size_t desc_num; + size_t size; + struct zx_desc_hw *desc_hw; +}; + +struct zx_dma_phy; + +struct zx_dma_chan { + struct dma_slave_config slave_cfg; + int id; /* Request phy chan id */ + u32 ccfg; + u32 cyclic; + struct virt_dma_chan vc; + struct zx_dma_phy *phy; + struct list_head node; + dma_addr_t dev_addr; + enum dma_status status; +}; + +struct zx_dma_phy { + u32 idx; + void __iomem *base; + struct zx_dma_chan *vchan; + struct zx_dma_desc_sw *ds_run; + struct zx_dma_desc_sw *ds_done; +}; + +struct zx_dma_dev { + struct dma_device slave; + void __iomem *base; + spinlock_t lock; /* lock for ch and phy */ + struct list_head chan_pending; + struct zx_dma_phy *phy; + struct zx_dma_chan *chans; + struct clk *clk; + struct dma_pool *pool; + u32 dma_channels; + u32 dma_requests; + int irq; +}; + +#define to_zx_dma(dmadev) container_of(dmadev, struct zx_dma_dev, slave) + +static struct zx_dma_chan *to_zx_chan(struct dma_chan *chan) +{ + return container_of(chan, struct zx_dma_chan, vc.chan); +} + +static void zx_dma_terminate_chan(struct zx_dma_phy *phy, struct zx_dma_dev *d) +{ + u32 val = 0; + + val = readl_relaxed(phy->base + REG_ZX_CTRL); + val &= ~ZX_CH_ENABLE; + val |= ZX_FORCE_CLOSE; + writel_relaxed(val, phy->base + REG_ZX_CTRL); + + val = 0x1 << phy->idx; + writel_relaxed(val, d->base + REG_ZX_TC_IRQ_RAW); + writel_relaxed(val, d->base + REG_ZX_SRC_ERR_IRQ_RAW); + writel_relaxed(val, d->base + REG_ZX_DST_ERR_IRQ_RAW); + writel_relaxed(val, d->base + REG_ZX_CFG_ERR_IRQ_RAW); +} + +static void zx_dma_set_desc(struct zx_dma_phy *phy, struct zx_desc_hw *hw) +{ + writel_relaxed(hw->saddr, phy->base + REG_ZX_SRC_ADDR); + writel_relaxed(hw->daddr, phy->base + REG_ZX_DST_ADDR); + writel_relaxed(hw->src_x, phy->base + REG_ZX_TX_X_COUNT); + writel_relaxed(0, phy->base + REG_ZX_TX_ZY_COUNT); + writel_relaxed(0, phy->base + REG_ZX_SRC_ZY_STEP); + writel_relaxed(0, phy->base + REG_ZX_DST_ZY_STEP); + writel_relaxed(hw->lli, phy->base + REG_ZX_LLI_ADDR); + writel_relaxed(hw->ctr, phy->base + REG_ZX_CTRL); +} + +static u32 zx_dma_get_curr_lli(struct zx_dma_phy *phy) +{ + return readl_relaxed(phy->base + REG_ZX_LLI_ADDR); +} + +static u32 zx_dma_get_chan_stat(struct zx_dma_dev *d) +{ + return readl_relaxed(d->base + REG_ZX_STATUS); +} + +static void zx_dma_init_state(struct zx_dma_dev *d) +{ + /* set same priority */ + writel_relaxed(0x0, d->base + REG_ZX_DMA_ARB); + /* clear all irq */ + writel_relaxed(0xffffffff, d->base + REG_ZX_TC_IRQ_RAW); + writel_relaxed(0xffffffff, d->base + REG_ZX_SRC_ERR_IRQ_RAW); + writel_relaxed(0xffffffff, d->base + REG_ZX_DST_ERR_IRQ_RAW); + writel_relaxed(0xffffffff, d->base + REG_ZX_CFG_ERR_IRQ_RAW); +} + +static int zx_dma_start_txd(struct zx_dma_chan *c) +{ + struct zx_dma_dev *d = to_zx_dma(c->vc.chan.device); + struct virt_dma_desc *vd = vchan_next_desc(&c->vc); + + if (!c->phy) + return -EAGAIN; + + if (BIT(c->phy->idx) & zx_dma_get_chan_stat(d)) + return -EAGAIN; + + if (vd) { + struct zx_dma_desc_sw *ds = + container_of(vd, struct zx_dma_desc_sw, vd); + /* + * fetch and remove request from vc->desc_issued + * so vc->desc_issued only contains desc pending + */ + list_del(&ds->vd.node); + c->phy->ds_run = ds; + c->phy->ds_done = NULL; + /* start dma */ + zx_dma_set_desc(c->phy, ds->desc_hw); + return 0; + } + c->phy->ds_done = NULL; + c->phy->ds_run = NULL; + return -EAGAIN; +} + +static void zx_dma_task(struct zx_dma_dev *d) +{ + struct zx_dma_phy *p; + struct zx_dma_chan *c, *cn; + unsigned pch, pch_alloc = 0; + unsigned long flags; + + /* check new dma request of running channel in vc->desc_issued */ + list_for_each_entry_safe(c, cn, &d->slave.channels, + vc.chan.device_node) { + spin_lock_irqsave(&c->vc.lock, flags); + p = c->phy; + if (p && p->ds_done && zx_dma_start_txd(c)) { + /* No current txd associated with this channel */ + dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx); + /* Mark this channel free */ + c->phy = NULL; + p->vchan = NULL; + } + spin_unlock_irqrestore(&c->vc.lock, flags); + } + + /* check new channel request in d->chan_pending */ + spin_lock_irqsave(&d->lock, flags); + while (!list_empty(&d->chan_pending)) { + c = list_first_entry(&d->chan_pending, + struct zx_dma_chan, node); + p = &d->phy[c->id]; + if (!p->vchan) { + /* remove from d->chan_pending */ + list_del_init(&c->node); + pch_alloc |= 1 << c->id; + /* Mark this channel allocated */ + p->vchan = c; + c->phy = p; + } else { + dev_dbg(d->slave.dev, "pchan %u: busy!\n", c->id); + } + } + spin_unlock_irqrestore(&d->lock, flags); + + for (pch = 0; pch < d->dma_channels; pch++) { + if (pch_alloc & (1 << pch)) { + p = &d->phy[pch]; + c = p->vchan; + if (c) { + spin_lock_irqsave(&c->vc.lock, flags); + zx_dma_start_txd(c); + spin_unlock_irqrestore(&c->vc.lock, flags); + } + } + } +} + +static irqreturn_t zx_dma_int_handler(int irq, void *dev_id) +{ + struct zx_dma_dev *d = (struct zx_dma_dev *)dev_id; + struct zx_dma_phy *p; + struct zx_dma_chan *c; + u32 tc = readl_relaxed(d->base + REG_ZX_TC_IRQ); + u32 serr = readl_relaxed(d->base + REG_ZX_SRC_ERR_IRQ); + u32 derr = readl_relaxed(d->base + REG_ZX_DST_ERR_IRQ); + u32 cfg = readl_relaxed(d->base + REG_ZX_CFG_ERR_IRQ); + u32 i, irq_chan = 0, task = 0; + + while (tc) { + i = __ffs(tc); + tc &= ~BIT(i); + p = &d->phy[i]; + c = p->vchan; + if (c) { + unsigned long flags; + + spin_lock_irqsave(&c->vc.lock, flags); + if (c->cyclic) { + vchan_cyclic_callback(&p->ds_run->vd); + } else { + vchan_cookie_complete(&p->ds_run->vd); + p->ds_done = p->ds_run; + task = 1; + } + spin_unlock_irqrestore(&c->vc.lock, flags); + irq_chan |= BIT(i); + } + } + + if (serr || derr || cfg) + dev_warn(d->slave.dev, "DMA ERR src 0x%x, dst 0x%x, cfg 0x%x\n", + serr, derr, cfg); + + writel_relaxed(irq_chan, d->base + REG_ZX_TC_IRQ_RAW); + writel_relaxed(serr, d->base + REG_ZX_SRC_ERR_IRQ_RAW); + writel_relaxed(derr, d->base + REG_ZX_DST_ERR_IRQ_RAW); + writel_relaxed(cfg, d->base + REG_ZX_CFG_ERR_IRQ_RAW); + + if (task) + zx_dma_task(d); + return IRQ_HANDLED; +} + +static void zx_dma_free_chan_resources(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_dev *d = to_zx_dma(chan->device); + unsigned long flags; + + spin_lock_irqsave(&d->lock, flags); + list_del_init(&c->node); + spin_unlock_irqrestore(&d->lock, flags); + + vchan_free_chan_resources(&c->vc); + c->ccfg = 0; +} + +static enum dma_status zx_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_phy *p; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + ret = dma_cookie_status(&c->vc.chan, cookie, state); + if (ret == DMA_COMPLETE || !state) + return ret; + + spin_lock_irqsave(&c->vc.lock, flags); + p = c->phy; + ret = c->status; + + /* + * If the cookie is on our issue queue, then the residue is + * its total size. + */ + vd = vchan_find_desc(&c->vc, cookie); + if (vd) { + bytes = container_of(vd, struct zx_dma_desc_sw, vd)->size; + } else if ((!p) || (!p->ds_run)) { + bytes = 0; + } else { + struct zx_dma_desc_sw *ds = p->ds_run; + u32 clli = 0, index = 0; + + bytes = 0; + clli = zx_dma_get_curr_lli(p); + index = (clli - ds->desc_hw_lli) / sizeof(struct zx_desc_hw); + for (; index < ds->desc_num; index++) { + bytes += ds->desc_hw[index].src_x; + /* end of lli */ + if (!ds->desc_hw[index].lli) + break; + } + } + spin_unlock_irqrestore(&c->vc.lock, flags); + dma_set_residue(state, bytes); + return ret; +} + +static void zx_dma_issue_pending(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_dev *d = to_zx_dma(chan->device); + unsigned long flags; + int issue = 0; + + spin_lock_irqsave(&c->vc.lock, flags); + /* add request to vc->desc_issued */ + if (vchan_issue_pending(&c->vc)) { + spin_lock(&d->lock); + if (!c->phy && list_empty(&c->node)) { + /* if new channel, add chan_pending */ + list_add_tail(&c->node, &d->chan_pending); + issue = 1; + dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc); + } + spin_unlock(&d->lock); + } else { + dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc); + } + spin_unlock_irqrestore(&c->vc.lock, flags); + + if (issue) + zx_dma_task(d); +} + +static void zx_dma_fill_desc(struct zx_dma_desc_sw *ds, dma_addr_t dst, + dma_addr_t src, size_t len, u32 num, u32 ccfg) +{ + if ((num + 1) < ds->desc_num) + ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) * + sizeof(struct zx_desc_hw); + ds->desc_hw[num].saddr = src; + ds->desc_hw[num].daddr = dst; + ds->desc_hw[num].src_x = len; + ds->desc_hw[num].ctr = ccfg; +} + +static struct zx_dma_desc_sw *zx_alloc_desc_resource(int num, + struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_desc_sw *ds; + struct zx_dma_dev *d = to_zx_dma(chan->device); + int lli_limit = LLI_BLOCK_SIZE / sizeof(struct zx_desc_hw); + + if (num > lli_limit) { + dev_dbg(chan->device->dev, "vch %p: sg num %d exceed max %d\n", + &c->vc, num, lli_limit); + return NULL; + } + + ds = kzalloc(sizeof(*ds), GFP_ATOMIC); + if (!ds) + return NULL; + + ds->desc_hw = dma_pool_alloc(d->pool, GFP_NOWAIT, &ds->desc_hw_lli); + if (!ds->desc_hw) { + dev_dbg(chan->device->dev, "vch %p: dma alloc fail\n", &c->vc); + kfree(ds); + return NULL; + } + memset(ds->desc_hw, sizeof(struct zx_desc_hw) * num, 0); + ds->desc_num = num; + return ds; +} + +static enum zx_dma_burst_width zx_dma_burst_width(enum dma_slave_buswidth width) +{ + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + case DMA_SLAVE_BUSWIDTH_2_BYTES: + case DMA_SLAVE_BUSWIDTH_4_BYTES: + case DMA_SLAVE_BUSWIDTH_8_BYTES: + return ffs(width) - 1; + default: + return ZX_DMA_WIDTH_32BIT; + } +} + +static int zx_pre_config(struct zx_dma_chan *c, enum dma_transfer_direction dir) +{ + struct dma_slave_config *cfg = &c->slave_cfg; + enum zx_dma_burst_width src_width; + enum zx_dma_burst_width dst_width; + u32 maxburst = 0; + + switch (dir) { + case DMA_MEM_TO_MEM: + c->ccfg = ZX_CH_ENABLE | ZX_SOFT_REQ + | ZX_SRC_BURST_LEN(ZX_MAX_BURST_LEN - 1) + | ZX_SRC_BURST_WIDTH(ZX_DMA_WIDTH_32BIT) + | ZX_DST_BURST_WIDTH(ZX_DMA_WIDTH_32BIT); + break; + case DMA_MEM_TO_DEV: + c->dev_addr = cfg->dst_addr; + /* dst len is calculated from src width, len and dst width. + * We need make sure dst len not exceed MAX LEN. + * Trailing single transaction that does not fill a full + * burst also require identical src/dst data width. + */ + dst_width = zx_dma_burst_width(cfg->dst_addr_width); + maxburst = cfg->dst_maxburst; + maxburst = maxburst < ZX_MAX_BURST_LEN ? + maxburst : ZX_MAX_BURST_LEN; + c->ccfg = ZX_DST_FIFO_MODE | ZX_CH_ENABLE + | ZX_SRC_BURST_LEN(maxburst - 1) + | ZX_SRC_BURST_WIDTH(dst_width) + | ZX_DST_BURST_WIDTH(dst_width); + break; + case DMA_DEV_TO_MEM: + c->dev_addr = cfg->src_addr; + src_width = zx_dma_burst_width(cfg->src_addr_width); + maxburst = cfg->src_maxburst; + maxburst = maxburst < ZX_MAX_BURST_LEN ? + maxburst : ZX_MAX_BURST_LEN; + c->ccfg = ZX_SRC_FIFO_MODE | ZX_CH_ENABLE + | ZX_SRC_BURST_LEN(maxburst - 1) + | ZX_SRC_BURST_WIDTH(src_width) + | ZX_DST_BURST_WIDTH(src_width); + break; + default: + return -EINVAL; + } + return 0; +} + +static struct dma_async_tx_descriptor *zx_dma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_desc_sw *ds; + size_t copy = 0; + int num = 0; + + if (!len) + return NULL; + + if (zx_pre_config(c, DMA_MEM_TO_MEM)) + return NULL; + + num = DIV_ROUND_UP(len, DMA_MAX_SIZE); + + ds = zx_alloc_desc_resource(num, chan); + if (!ds) + return NULL; + + ds->size = len; + num = 0; + + do { + copy = min_t(size_t, len, DMA_MAX_SIZE); + zx_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg); + + src += copy; + dst += copy; + len -= copy; + } while (len); + + c->cyclic = 0; + ds->desc_hw[num - 1].lli = 0; /* end of link */ + ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL; + return vchan_tx_prep(&c->vc, &ds->vd, flags); +} + +static struct dma_async_tx_descriptor *zx_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen, + enum dma_transfer_direction dir, unsigned long flags, void *context) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_desc_sw *ds; + size_t len, avail, total = 0; + struct scatterlist *sg; + dma_addr_t addr, src = 0, dst = 0; + int num = sglen, i; + + if (!sgl) + return NULL; + + if (zx_pre_config(c, dir)) + return NULL; + + for_each_sg(sgl, sg, sglen, i) { + avail = sg_dma_len(sg); + if (avail > DMA_MAX_SIZE) + num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1; + } + + ds = zx_alloc_desc_resource(num, chan); + if (!ds) + return NULL; + + c->cyclic = 0; + num = 0; + for_each_sg(sgl, sg, sglen, i) { + addr = sg_dma_address(sg); + avail = sg_dma_len(sg); + total += avail; + + do { + len = min_t(size_t, avail, DMA_MAX_SIZE); + + if (dir == DMA_MEM_TO_DEV) { + src = addr; + dst = c->dev_addr; + } else if (dir == DMA_DEV_TO_MEM) { + src = c->dev_addr; + dst = addr; + } + + zx_dma_fill_desc(ds, dst, src, len, num++, c->ccfg); + + addr += len; + avail -= len; + } while (avail); + } + + ds->desc_hw[num - 1].lli = 0; /* end of link */ + ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL; + ds->size = total; + return vchan_tx_prep(&c->vc, &ds->vd, flags); +} + +static struct dma_async_tx_descriptor *zx_dma_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, + size_t period_len, enum dma_transfer_direction dir, + unsigned long flags) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_desc_sw *ds; + dma_addr_t src = 0, dst = 0; + int num_periods = buf_len / period_len; + int buf = 0, num = 0; + + if (period_len > DMA_MAX_SIZE) { + dev_err(chan->device->dev, "maximum period size exceeded\n"); + return NULL; + } + + if (zx_pre_config(c, dir)) + return NULL; + + ds = zx_alloc_desc_resource(num_periods, chan); + if (!ds) + return NULL; + c->cyclic = 1; + + while (buf < buf_len) { + if (dir == DMA_MEM_TO_DEV) { + src = dma_addr; + dst = c->dev_addr; + } else if (dir == DMA_DEV_TO_MEM) { + src = c->dev_addr; + dst = dma_addr; + } + zx_dma_fill_desc(ds, dst, src, period_len, num++, + c->ccfg | ZX_IRQ_ENABLE_ALL); + dma_addr += period_len; + buf += period_len; + } + + ds->desc_hw[num - 1].lli = ds->desc_hw_lli; + ds->size = buf_len; + return vchan_tx_prep(&c->vc, &ds->vd, flags); +} + +static int zx_dma_config(struct dma_chan *chan, + struct dma_slave_config *cfg) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + + if (!cfg) + return -EINVAL; + + memcpy(&c->slave_cfg, cfg, sizeof(*cfg)); + + return 0; +} + +static int zx_dma_terminate_all(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + struct zx_dma_dev *d = to_zx_dma(chan->device); + struct zx_dma_phy *p = c->phy; + unsigned long flags; + LIST_HEAD(head); + + dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc); + + /* Prevent this channel being scheduled */ + spin_lock(&d->lock); + list_del_init(&c->node); + spin_unlock(&d->lock); + + /* Clear the tx descriptor lists */ + spin_lock_irqsave(&c->vc.lock, flags); + vchan_get_all_descriptors(&c->vc, &head); + if (p) { + /* vchan is assigned to a pchan - stop the channel */ + zx_dma_terminate_chan(p, d); + c->phy = NULL; + p->vchan = NULL; + p->ds_run = NULL; + p->ds_done = NULL; + } + spin_unlock_irqrestore(&c->vc.lock, flags); + vchan_dma_desc_free_list(&c->vc, &head); + + return 0; +} + +static int zx_dma_transfer_pause(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + u32 val = 0; + + val = readl_relaxed(c->phy->base + REG_ZX_CTRL); + val &= ~ZX_CH_ENABLE; + writel_relaxed(val, c->phy->base + REG_ZX_CTRL); + + return 0; +} + +static int zx_dma_transfer_resume(struct dma_chan *chan) +{ + struct zx_dma_chan *c = to_zx_chan(chan); + u32 val = 0; + + val = readl_relaxed(c->phy->base + REG_ZX_CTRL); + val |= ZX_CH_ENABLE; + writel_relaxed(val, c->phy->base + REG_ZX_CTRL); + + return 0; +} + +static void zx_dma_free_desc(struct virt_dma_desc *vd) +{ + struct zx_dma_desc_sw *ds = + container_of(vd, struct zx_dma_desc_sw, vd); + struct zx_dma_dev *d = to_zx_dma(vd->tx.chan->device); + + dma_pool_free(d->pool, ds->desc_hw, ds->desc_hw_lli); + kfree(ds); +} + +static const struct of_device_id zx6702_dma_dt_ids[] = { + { .compatible = "zte,zx296702-dma", }, + {} +}; +MODULE_DEVICE_TABLE(of, zx6702_dma_dt_ids); + +static struct dma_chan *zx_of_dma_simple_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct zx_dma_dev *d = ofdma->of_dma_data; + unsigned int request = dma_spec->args[0]; + struct dma_chan *chan; + struct zx_dma_chan *c; + + if (request > d->dma_requests) + return NULL; + + chan = dma_get_any_slave_channel(&d->slave); + if (!chan) { + dev_err(d->slave.dev, "get channel fail in %s.\n", __func__); + return NULL; + } + c = to_zx_chan(chan); + c->id = request; + dev_info(d->slave.dev, "zx_dma: pchan %u: alloc vchan %p\n", + c->id, &c->vc); + return chan; +} + +static int zx_dma_probe(struct platform_device *op) +{ + struct zx_dma_dev *d; + struct resource *iores; + int i, ret = 0; + + iores = platform_get_resource(op, IORESOURCE_MEM, 0); + if (!iores) + return -EINVAL; + + d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->base = devm_ioremap_resource(&op->dev, iores); + if (IS_ERR(d->base)) + return PTR_ERR(d->base); + + of_property_read_u32((&op->dev)->of_node, + "dma-channels", &d->dma_channels); + of_property_read_u32((&op->dev)->of_node, + "dma-requests", &d->dma_requests); + if (!d->dma_requests || !d->dma_channels) + return -EINVAL; + + d->clk = devm_clk_get(&op->dev, NULL); + if (IS_ERR(d->clk)) { + dev_err(&op->dev, "no dma clk\n"); + return PTR_ERR(d->clk); + } + + d->irq = platform_get_irq(op, 0); + ret = devm_request_irq(&op->dev, d->irq, zx_dma_int_handler, + 0, DRIVER_NAME, d); + if (ret) + return ret; + + /* A DMA memory pool for LLIs, align on 32-byte boundary */ + d->pool = dmam_pool_create(DRIVER_NAME, &op->dev, + LLI_BLOCK_SIZE, 32, 0); + if (!d->pool) + return -ENOMEM; + + /* init phy channel */ + d->phy = devm_kzalloc(&op->dev, + d->dma_channels * sizeof(struct zx_dma_phy), GFP_KERNEL); + if (!d->phy) + return -ENOMEM; + + for (i = 0; i < d->dma_channels; i++) { + struct zx_dma_phy *p = &d->phy[i]; + + p->idx = i; + p->base = d->base + i * 0x40; + } + + INIT_LIST_HEAD(&d->slave.channels); + dma_cap_set(DMA_SLAVE, d->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, d->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, d->slave.cap_mask); + d->slave.dev = &op->dev; + d->slave.device_free_chan_resources = zx_dma_free_chan_resources; + d->slave.device_tx_status = zx_dma_tx_status; + d->slave.device_prep_dma_memcpy = zx_dma_prep_memcpy; + d->slave.device_prep_slave_sg = zx_dma_prep_slave_sg; + d->slave.device_prep_dma_cyclic = zx_dma_prep_dma_cyclic; + d->slave.device_issue_pending = zx_dma_issue_pending; + d->slave.device_config = zx_dma_config; + d->slave.device_terminate_all = zx_dma_terminate_all; + d->slave.device_pause = zx_dma_transfer_pause; + d->slave.device_resume = zx_dma_transfer_resume; + d->slave.copy_align = DMA_ALIGN; + d->slave.src_addr_widths = ZX_DMA_BUSWIDTHS; + d->slave.dst_addr_widths = ZX_DMA_BUSWIDTHS; + d->slave.directions = BIT(DMA_MEM_TO_MEM) | BIT(DMA_MEM_TO_DEV) + | BIT(DMA_DEV_TO_MEM); + d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + + /* init virtual channel */ + d->chans = devm_kzalloc(&op->dev, + d->dma_requests * sizeof(struct zx_dma_chan), GFP_KERNEL); + if (!d->chans) + return -ENOMEM; + + for (i = 0; i < d->dma_requests; i++) { + struct zx_dma_chan *c = &d->chans[i]; + + c->status = DMA_IN_PROGRESS; + INIT_LIST_HEAD(&c->node); + c->vc.desc_free = zx_dma_free_desc; + vchan_init(&c->vc, &d->slave); + } + + /* Enable clock before accessing registers */ + ret = clk_prepare_enable(d->clk); + if (ret < 0) { + dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret); + goto zx_dma_out; + } + + zx_dma_init_state(d); + + spin_lock_init(&d->lock); + INIT_LIST_HEAD(&d->chan_pending); + platform_set_drvdata(op, d); + + ret = dma_async_device_register(&d->slave); + if (ret) + goto clk_dis; + + ret = of_dma_controller_register((&op->dev)->of_node, + zx_of_dma_simple_xlate, d); + if (ret) + goto of_dma_register_fail; + + dev_info(&op->dev, "initialized\n"); + return 0; + +of_dma_register_fail: + dma_async_device_unregister(&d->slave); +clk_dis: + clk_disable_unprepare(d->clk); +zx_dma_out: + return ret; +} + +static int zx_dma_remove(struct platform_device *op) +{ + struct zx_dma_chan *c, *cn; + struct zx_dma_dev *d = platform_get_drvdata(op); + + /* explictly free the irq */ + devm_free_irq(&op->dev, d->irq, d); + + dma_async_device_unregister(&d->slave); + of_dma_controller_free((&op->dev)->of_node); + + list_for_each_entry_safe(c, cn, &d->slave.channels, + vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + } + clk_disable_unprepare(d->clk); + dmam_pool_destroy(d->pool); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int zx_dma_suspend_dev(struct device *dev) +{ + struct zx_dma_dev *d = dev_get_drvdata(dev); + u32 stat = 0; + + stat = zx_dma_get_chan_stat(d); + if (stat) { + dev_warn(d->slave.dev, + "chan %d is running fail to suspend\n", stat); + return -1; + } + clk_disable_unprepare(d->clk); + return 0; +} + +static int zx_dma_resume_dev(struct device *dev) +{ + struct zx_dma_dev *d = dev_get_drvdata(dev); + int ret = 0; + + ret = clk_prepare_enable(d->clk); + if (ret < 0) { + dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret); + return ret; + } + zx_dma_init_state(d); + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(zx_dma_pmops, zx_dma_suspend_dev, zx_dma_resume_dev); + +static struct platform_driver zx_pdma_driver = { + .driver = { + .name = DRIVER_NAME, + .pm = &zx_dma_pmops, + .of_match_table = zx6702_dma_dt_ids, + }, + .probe = zx_dma_probe, + .remove = zx_dma_remove, +}; + +module_platform_driver(zx_pdma_driver); + +MODULE_DESCRIPTION("ZTE ZX296702 DMA Driver"); +MODULE_AUTHOR("Jun Nie jun.nie@linaro.org"); +MODULE_LICENSE("GPL v2"); |