diff options
534 files changed, 20565 insertions, 13674 deletions
@@ -495,6 +495,11 @@ S: Kopmansg 2 S: 411 13 Goteborg S: Sweden +N: Paul Bristow +E: paul@paulbristow.net +W: http://paulbristow.net/linux/idefloppy.html +D: Maintainer of IDE/ATAPI floppy driver + N: Dominik Brodowski E: linux@brodo.de W: http://www.brodo.de/ @@ -2642,6 +2647,10 @@ S: C/ Mieses 20, 9-B S: Valladolid 47009 S: Spain +N: Gadi Oxman +E: gadio@netvision.net.il +D: Original author and maintainer of IDE/ATAPI floppy/tape drivers + N: Greg Page E: gpage@sovereign.org D: IPX development and support diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index e638e15a8895..97ad190e13af 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -41,6 +41,49 @@ Description: for the device and attempt to bind to it. For example: # echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id +What: /sys/bus/pci/drivers/.../remove_id +Date: February 2009 +Contact: Chris Wright <chrisw@sous-sol.org> +Description: + Writing a device ID to this file will remove an ID + that was dynamically added via the new_id sysfs entry. + The format for the device ID is: + VVVV DDDD SVVV SDDD CCCC MMMM. That is Vendor ID, Device + ID, Subsystem Vendor ID, Subsystem Device ID, Class, + and Class Mask. The Vendor ID and Device ID fields are + required, the rest are optional. After successfully + removing an ID, the driver will no longer support the + device. This is useful to ensure auto probing won't + match the driver to the device. For example: + # echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id + +What: /sys/bus/pci/rescan +Date: January 2009 +Contact: Linux PCI developers <linux-pci@vger.kernel.org> +Description: + Writing a non-zero value to this attribute will + force a rescan of all PCI buses in the system, and + re-discover previously removed devices. + Depends on CONFIG_HOTPLUG. + +What: /sys/bus/pci/devices/.../remove +Date: January 2009 +Contact: Linux PCI developers <linux-pci@vger.kernel.org> +Description: + Writing a non-zero value to this attribute will + hot-remove the PCI device and any of its children. + Depends on CONFIG_HOTPLUG. + +What: /sys/bus/pci/devices/.../rescan +Date: January 2009 +Contact: Linux PCI developers <linux-pci@vger.kernel.org> +Description: + Writing a non-zero value to this attribute will + force a rescan of the device's parent bus and all + child buses, and re-discover devices removed earlier + from this part of the device tree. + Depends on CONFIG_HOTPLUG. + What: /sys/bus/pci/devices/.../vpd Date: February 2008 Contact: Ben Hutchings <bhutchings@solarflare.com> @@ -52,3 +95,30 @@ Description: that some devices may have malformatted data. If the underlying VPD has a writable section then the corresponding section of this file will be writable. + +What: /sys/bus/pci/devices/.../virtfnN +Date: March 2009 +Contact: Yu Zhao <yu.zhao@intel.com> +Description: + This symbolic link appears when hardware supports the SR-IOV + capability and the Physical Function driver has enabled it. + The symbolic link points to the PCI device sysfs entry of the + Virtual Function whose index is N (0...MaxVFs-1). + +What: /sys/bus/pci/devices/.../dep_link +Date: March 2009 +Contact: Yu Zhao <yu.zhao@intel.com> +Description: + This symbolic link appears when hardware supports the SR-IOV + capability and the Physical Function driver has enabled it, + and this device has vendor specific dependencies with others. + The symbolic link points to the PCI device sysfs entry of + Physical Function this device depends on. + +What: /sys/bus/pci/devices/.../physfn +Date: March 2009 +Contact: Yu Zhao <yu.zhao@intel.com> +Description: + This symbolic link appears when a device is a Virtual Function. + The symbolic link points to the PCI device sysfs entry of the + Physical Function this device associates with. diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4 new file mode 100644 index 000000000000..4e79074de282 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-fs-ext4 @@ -0,0 +1,81 @@ +What: /sys/fs/ext4/<disk>/mb_stats +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + Controls whether the multiblock allocator should + collect statistics, which are shown during the unmount. + 1 means to collect statistics, 0 means not to collect + statistics + +What: /sys/fs/ext4/<disk>/mb_group_prealloc +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + The multiblock allocator will round up allocation + requests to a multiple of this tuning parameter if the + stripe size is not set in the ext4 superblock + +What: /sys/fs/ext4/<disk>/mb_max_to_scan +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + The maximum number of extents the multiblock allocator + will search to find the best extent + +What: /sys/fs/ext4/<disk>/mb_min_to_scan +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + The minimum number of extents the multiblock allocator + will search to find the best extent + +What: /sys/fs/ext4/<disk>/mb_order2_req +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + Tuning parameter which controls the minimum size for + requests (as a power of 2) where the buddy cache is + used + +What: /sys/fs/ext4/<disk>/mb_stream_req +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + Files which have fewer blocks than this tunable + parameter will have their blocks allocated out of a + block group specific preallocation pool, so that small + files are packed closely together. Each large file + will have its blocks allocated out of its own unique + preallocation pool. + +What: /sys/fs/ext4/<disk>/inode_readahead +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + Tuning parameter which controls the maximum number of + inode table blocks that ext4's inode table readahead + algorithm will pre-read into the buffer cache + +What: /sys/fs/ext4/<disk>/delayed_allocation_blocks +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + This file is read-only and shows the number of blocks + that are dirty in the page cache, but which do not + have their location in the filesystem allocated yet. + +What: /sys/fs/ext4/<disk>/lifetime_write_kbytes +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + This file is read-only and shows the number of kilobytes + of data that have been written to this filesystem since it was + created. + +What: /sys/fs/ext4/<disk>/session_write_kbytes +Date: March 2008 +Contact: "Theodore Ts'o" <tytso@mit.edu> +Description: + This file is read-only and shows the number of + kilobytes of data that have been written to this + filesystem since it was mounted. diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index bc962cda6504..58c194572c76 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -199,6 +199,7 @@ X!Edrivers/pci/hotplug.c --> !Edrivers/pci/probe.c !Edrivers/pci/rom.c +!Edrivers/pci/iov.c </sect1> <sect1><title>PCI Hotplug Support Library</title> !Edrivers/pci/hotplug/pci_hotplug_core.c diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 256defd7e174..dcf7acc720e1 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -4,506 +4,356 @@ Revised Feb 12, 2004 by Martine Silbermann email: Martine.Silbermann@hp.com Revised Jun 25, 2004 by Tom L Nguyen + Revised Jul 9, 2008 by Matthew Wilcox <willy@linux.intel.com> + Copyright 2003, 2008 Intel Corporation 1. About this guide -This guide describes the basics of Message Signaled Interrupts (MSI), -the advantages of using MSI over traditional interrupt mechanisms, -and how to enable your driver to use MSI or MSI-X. Also included is -a Frequently Asked Questions (FAQ) section. - -1.1 Terminology - -PCI devices can be single-function or multi-function. In either case, -when this text talks about enabling or disabling MSI on a "device -function," it is referring to one specific PCI device and function and -not to all functions on a PCI device (unless the PCI device has only -one function). - -2. Copyright 2003 Intel Corporation - -3. What is MSI/MSI-X? - -Message Signaled Interrupt (MSI), as described in the PCI Local Bus -Specification Revision 2.3 or later, is an optional feature, and a -required feature for PCI Express devices. MSI enables a device function -to request service by sending an Inbound Memory Write on its PCI bus to -the FSB as a Message Signal Interrupt transaction. Because MSI is -generated in the form of a Memory Write, all transaction conditions, -such as a Retry, Master-Abort, Target-Abort or normal completion, are -supported. - -A PCI device that supports MSI must also support pin IRQ assertion -interrupt mechanism to provide backward compatibility for systems that -do not support MSI. In systems which support MSI, the bus driver is -responsible for initializing the message address and message data of -the device function's MSI/MSI-X capability structure during device -initial configuration. - -An MSI capable device function indicates MSI support by implementing -the MSI/MSI-X capability structure in its PCI capability list. The -device function may implement both the MSI capability structure and -the MSI-X capability structure; however, the bus driver should not -enable both. - -The MSI capability structure contains Message Control register, -Message Address register and Message Data register. These registers -provide the bus driver control over MSI. The Message Control register -indicates the MSI capability supported by the device. The Message -Address register specifies the target address and the Message Data -register specifies the characteristics of the message. To request -service, the device function writes the content of the Message Data -register to the target address. The device and its software driver -are prohibited from writing to these registers. - -The MSI-X capability structure is an optional extension to MSI. It -uses an independent and separate capability structure. There are -some key advantages to implementing the MSI-X capability structure -over the MSI capability structure as described below. - - - Support a larger maximum number of vectors per function. - - - Provide the ability for system software to configure - each vector with an independent message address and message - data, specified by a table that resides in Memory Space. - - - MSI and MSI-X both support per-vector masking. Per-vector - masking is an optional extension of MSI but a required - feature for MSI-X. Per-vector masking provides the kernel the - ability to mask/unmask a single MSI while running its - interrupt service routine. If per-vector masking is - not supported, then the device driver should provide the - hardware/software synchronization to ensure that the device - generates MSI when the driver wants it to do so. - -4. Why use MSI? - -As a benefit to the simplification of board design, MSI allows board -designers to remove out-of-band interrupt routing. MSI is another -step towards a legacy-free environment. - -Due to increasing pressure on chipset and processor packages to -reduce pin count, the need for interrupt pins is expected to -diminish over time. Devices, due to pin constraints, may implement -messages to increase performance. - -PCI Express endpoints uses INTx emulation (in-band messages) instead -of IRQ pin assertion. Using INTx emulation requires interrupt -sharing among devices connected to the same node (PCI bridge) while -MSI is unique (non-shared) and does not require BIOS configuration -support. As a result, the PCI Express technology requires MSI -support for better interrupt performance. - -Using MSI enables the device functions to support two or more -vectors, which can be configured to target different CPUs to -increase scalability. - -5. Configuring a driver to use MSI/MSI-X - -By default, the kernel will not enable MSI/MSI-X on all devices that -support this capability. The CONFIG_PCI_MSI kernel option -must be selected to enable MSI/MSI-X support. - -5.1 Including MSI/MSI-X support into the kernel - -To allow MSI/MSI-X capable device drivers to selectively enable -MSI/MSI-X (using pci_enable_msi()/pci_enable_msix() as described -below), the VECTOR based scheme needs to be enabled by setting -CONFIG_PCI_MSI during kernel config. - -Since the target of the inbound message is the local APIC, providing -CONFIG_X86_LOCAL_APIC must be enabled as well as CONFIG_PCI_MSI. - -5.2 Configuring for MSI support - -Due to the non-contiguous fashion in vector assignment of the -existing Linux kernel, this version does not support multiple -messages regardless of a device function is capable of supporting -more than one vector. To enable MSI on a device function's MSI -capability structure requires a device driver to call the function -pci_enable_msi() explicitly. - -5.2.1 API pci_enable_msi +This guide describes the basics of Message Signaled Interrupts (MSIs), +the advantages of using MSI over traditional interrupt mechanisms, how +to change your driver to use MSI or MSI-X and some basic diagnostics to +try if a device doesn't support MSIs. -int pci_enable_msi(struct pci_dev *dev) -With this new API, a device driver that wants to have MSI -enabled on its device function must call this API to enable MSI. -A successful call will initialize the MSI capability structure -with ONE vector, regardless of whether a device function is -capable of supporting multiple messages. This vector replaces the -pre-assigned dev->irq with a new MSI vector. To avoid a conflict -of the new assigned vector with existing pre-assigned vector requires -a device driver to call this API before calling request_irq(). +2. What are MSIs? -5.2.2 API pci_disable_msi +A Message Signaled Interrupt is a write from the device to a special +address which causes an interrupt to be received by the CPU. -void pci_disable_msi(struct pci_dev *dev) +The MSI capability was first specified in PCI 2.2 and was later enhanced +in PCI 3.0 to allow each interrupt to be masked individually. The MSI-X +capability was also introduced with PCI 3.0. It supports more interrupts +per device than MSI and allows interrupts to be independently configured. -This API should always be used to undo the effect of pci_enable_msi() -when a device driver is unloading. This API restores dev->irq with -the pre-assigned IOAPIC vector and switches a device's interrupt -mode to PCI pin-irq assertion/INTx emulation mode. - -Note that a device driver should always call free_irq() on the MSI vector -that it has done request_irq() on before calling this API. Failure to do -so results in a BUG_ON() and a device will be left with MSI enabled and -leaks its vector. - -5.2.3 MSI mode vs. legacy mode diagram - -The below diagram shows the events which switch the interrupt -mode on the MSI-capable device function between MSI mode and -PIN-IRQ assertion mode. - - ------------ pci_enable_msi ------------------------ - | | <=============== | | - | MSI MODE | | PIN-IRQ ASSERTION MODE | - | | ===============> | | - ------------ pci_disable_msi ------------------------ - - -Figure 1. MSI Mode vs. Legacy Mode - -In Figure 1, a device operates by default in legacy mode. Legacy -in this context means PCI pin-irq assertion or PCI-Express INTx -emulation. A successful MSI request (using pci_enable_msi()) switches -a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector -stored in dev->irq will be saved by the PCI subsystem and a new -assigned MSI vector will replace dev->irq. - -To return back to its default mode, a device driver should always call -pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a -device driver should always call free_irq() on the MSI vector it has -done request_irq() on before calling pci_disable_msi(). Failure to do -so results in a BUG_ON() and a device will be left with MSI enabled and -leaks its vector. Otherwise, the PCI subsystem restores a device's -dev->irq with a pre-assigned IOAPIC vector and marks the released -MSI vector as unused. - -Once being marked as unused, there is no guarantee that the PCI -subsystem will reserve this MSI vector for a device. Depending on -the availability of current PCI vector resources and the number of -MSI/MSI-X requests from other drivers, this MSI may be re-assigned. - -For the case where the PCI subsystem re-assigns this MSI vector to -another driver, a request to switch back to MSI mode may result -in being assigned a different MSI vector or a failure if no more -vectors are available. - -5.3 Configuring for MSI-X support - -Due to the ability of the system software to configure each vector of -the MSI-X capability structure with an independent message address -and message data, the non-contiguous fashion in vector assignment of -the existing Linux kernel has no impact on supporting multiple -messages on an MSI-X capable device functions. To enable MSI-X on -a device function's MSI-X capability structure requires its device -driver to call the function pci_enable_msix() explicitly. - -The function pci_enable_msix(), once invoked, enables either -all or nothing, depending on the current availability of PCI vector -resources. If the PCI vector resources are available for the number -of vectors requested by a device driver, this function will configure -the MSI-X table of the MSI-X capability structure of a device with -requested messages. To emphasize this reason, for example, a device -may be capable for supporting the maximum of 32 vectors while its -software driver usually may request 4 vectors. It is recommended -that the device driver should call this function once during the -initialization phase of the device driver. - -Unlike the function pci_enable_msi(), the function pci_enable_msix() -does not replace the pre-assigned IOAPIC dev->irq with a new MSI -vector because the PCI subsystem writes the 1:1 vector-to-entry mapping -into the field vector of each element contained in a second argument. -Note that the pre-assigned IOAPIC dev->irq is valid only if the device -operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at -using dev->irq by the device driver to request for interrupt service -may result in unpredictable behavior. - -For each MSI-X vector granted, a device driver is responsible for calling -other functions like request_irq(), enable_irq(), etc. to enable -this vector with its corresponding interrupt service handler. It is -a device driver's choice to assign all vectors with the same -interrupt service handler or each vector with a unique interrupt -service handler. - -5.3.1 Handling MMIO address space of MSI-X Table - -The PCI 3.0 specification has implementation notes that MMIO address -space for a device's MSI-X structure should be isolated so that the -software system can set different pages for controlling accesses to the -MSI-X structure. The implementation of MSI support requires the PCI -subsystem, not a device driver, to maintain full control of the MSI-X -table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X -table/MSI-X PBA. A device driver should not access the MMIO address -space of the MSI-X table/MSI-X PBA. - -5.3.2 API pci_enable_msix +Devices may support both MSI and MSI-X, but only one can be enabled at +a time. -int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) -This API enables a device driver to request the PCI subsystem -to enable MSI-X messages on its hardware device. Depending on -the availability of PCI vectors resources, the PCI subsystem enables -either all or none of the requested vectors. +3. Why use MSIs? + +There are three reasons why using MSIs can give an advantage over +traditional pin-based interrupts. + +Pin-based PCI interrupts are often shared amongst several devices. +To support this, the kernel must call each interrupt handler associated +with an interrupt, which leads to reduced performance for the system as +a whole. MSIs are never shared, so this problem cannot arise. + +When a device writes data to memory, then raises a pin-based interrupt, +it is possible that the interrupt may arrive before all the data has +arrived in memory (this becomes more likely with devices behind PCI-PCI +bridges). In order to ensure that all the data has arrived in memory, +the interrupt handler must read a register on the device which raised +the interrupt. PCI transaction ordering rules require that all the data +arrives in memory before the value can be returned from the register. +Using MSIs avoids this problem as the interrupt-generating write cannot +pass the data writes, so by the time the interrupt is raised, the driver +knows that all the data has arrived in memory. + +PCI devices can only support a single pin-based interrupt per function. +Often drivers have to query the device to find out what event has +occurred, slowing down interrupt handling for the common case. With +MSIs, a device can support more interrupts, allowing each interrupt +to be specialised to a different purpose. One possible design gives +infrequent conditions (such as errors) their own interrupt which allows +the driver to handle the normal interrupt handling path more efficiently. +Other possible designs include giving one interrupt to each packet queue +in a network card or each port in a storage controller. + + +4. How to use MSIs + +PCI devices are initialised to use pin-based interrupts. The device +driver has to set up the device to use MSI or MSI-X. Not all machines +support MSIs correctly, and for those machines, the APIs described below +will simply fail and the device will continue to use pin-based interrupts. + +4.1 Include kernel support for MSIs + +To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI +option enabled. This option is only available on some architectures, +and it may depend on some other options also being set. For example, +on x86, you must also enable X86_UP_APIC or SMP in order to see the +CONFIG_PCI_MSI option. + +4.2 Using MSI + +Most of the hard work is done for the driver in the PCI layer. It simply +has to request that the PCI layer set up the MSI capability for this +device. + +4.2.1 pci_enable_msi + +int pci_enable_msi(struct pci_dev *dev) + +A successful call will allocate ONE interrupt to the device, regardless +of how many MSIs the device supports. The device will be switched from +pin-based interrupt mode to MSI mode. The dev->irq number is changed +to a new number which represents the message signaled interrupt. +This function should be called before the driver calls request_irq() +since enabling MSIs disables the pin-based IRQ and the driver will not +receive interrupts on the old interrupt. + +4.2.2 pci_enable_msi_block + +int pci_enable_msi_block(struct pci_dev *dev, int count) + +This variation on the above call allows a device driver to request multiple +MSIs. The MSI specification only allows interrupts to be allocated in +powers of two, up to a maximum of 2^5 (32). + +If this function returns 0, it has succeeded in allocating at least as many +interrupts as the driver requested (it may have allocated more in order +to satisfy the power-of-two requirement). In this case, the function +enables MSI on this device and updates dev->irq to be the lowest of +the new interrupts assigned to it. The other interrupts assigned to +the device are in the range dev->irq to dev->irq + count - 1. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to request any more MSI interrupts for +this device. If this function returns a positive number, it will be +less than 'count' and indicate the number of interrupts that could have +been allocated. In neither case will the irq value have been +updated, nor will the device have been switched into MSI mode. + +The device driver must decide what action to take if +pci_enable_msi_block() returns a value less than the number asked for. +Some devices can make use of fewer interrupts than the maximum they +request; in this case the driver should call pci_enable_msi_block() +again. Note that it is not guaranteed to succeed, even when the +'count' has been reduced to the value returned from a previous call to +pci_enable_msi_block(). This is because there are multiple constraints +on the number of vectors that can be allocated; pci_enable_msi_block() +will return as soon as it finds any constraint that doesn't allow the +call to succeed. + +4.2.3 pci_disable_msi + +void pci_disable_msi(struct pci_dev *dev) -Argument 'dev' points to the device (pci_dev) structure. +This function should be used to undo the effect of pci_enable_msi() or +pci_enable_msi_block(). Calling it restores dev->irq to the pin-based +interrupt number and frees the previously allocated message signaled +interrupt(s). The interrupt may subsequently be assigned to another +device, so drivers should not cache the value of dev->irq. -Argument 'entries' is a pointer to an array of msix_entry structs. -The number of entries is indicated in argument 'nvec'. -struct msix_entry is defined in /driver/pci/msi.h: +A device driver must always call free_irq() on the interrupt(s) +for which it has called request_irq() before calling this function. +Failure to do so will result in a BUG_ON(), the device will be left with +MSI enabled and will leak its vector. + +4.3 Using MSI-X + +The MSI-X capability is much more flexible than the MSI capability. +It supports up to 2048 interrupts, each of which can be controlled +independently. To support this flexibility, drivers must use an array of +`struct msix_entry': struct msix_entry { u16 vector; /* kernel uses to write alloc vector */ u16 entry; /* driver uses to specify entry */ }; -A device driver is responsible for initializing the field 'entry' of -each element with a unique entry supported by MSI-X table. Otherwise, --EINVAL will be returned as a result. A successful return of zero -indicates the PCI subsystem completed initializing each of the requested -entries of the MSI-X table with message address and message data. -Last but not least, the PCI subsystem will write the 1:1 -vector-to-entry mapping into the field 'vector' of each element. A -device driver is responsible for keeping track of allocated MSI-X -vectors in its internal data structure. - -A return of zero indicates that the number of MSI-X vectors was -successfully allocated. A return of greater than zero indicates -MSI-X vector shortage. Or a return of less than zero indicates -a failure. This failure may be a result of duplicate entries -specified in second argument, or a result of no available vector, -or a result of failing to initialize MSI-X table entries. - -5.3.3 API pci_disable_msix +This allows for the device to use these interrupts in a sparse fashion; +for example it could use interrupts 3 and 1027 and allocate only a +two-element array. The driver is expected to fill in the 'entry' value +in each element of the array to indicate which entries it wants the kernel +to assign interrupts for. It is invalid to fill in two entries with the +same number. + +4.3.1 pci_enable_msix + +int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) + +Calling this function asks the PCI subsystem to allocate 'nvec' MSIs. +The 'entries' argument is a pointer to an array of msix_entry structs +which should be at least 'nvec' entries in size. On success, the +function will return 0 and the device will have been switched into +MSI-X interrupt mode. The 'vector' elements in each entry will have +been filled in with the interrupt number. The driver should then call +request_irq() for each 'vector' that it decides to use. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to allocate any more MSI-X interrupts for +this device. If it returns a positive number, it indicates the maximum +number of interrupt vectors that could have been allocated. See example +below. + +This function, in contrast with pci_enable_msi(), does not adjust +dev->irq. The device will not generate interrupts for this interrupt +number once MSI-X is enabled. The device driver is responsible for +keeping track of the interrupts assigned to the MSI-X vectors so it can +free them again later. + +Device drivers should normally call this function once per device +during the initialization phase. + +It is ideal if drivers can cope with a variable number of MSI-X interrupts, +there are many reasons why the platform may not be able to provide the +exact number a driver asks for. + +A request loop to achieve that might look like: + +static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec) +{ + while (nvec >= FOO_DRIVER_MINIMUM_NVEC) { + rc = pci_enable_msix(adapter->pdev, + adapter->msix_entries, nvec); + if (rc > 0) + nvec = rc; + else + return rc; + } + + return -ENOSPC; +} + +4.3.2 pci_disable_msix void pci_disable_msix(struct pci_dev *dev) -This API should always be used to undo the effect of pci_enable_msix() -when a device driver is unloading. Note that a device driver should -always call free_irq() on all MSI-X vectors it has done request_irq() -on before calling this API. Failure to do so results in a BUG_ON() and -a device will be left with MSI-X enabled and leaks its vectors. - -5.3.4 MSI-X mode vs. legacy mode diagram - -The below diagram shows the events which switch the interrupt -mode on the MSI-X capable device function between MSI-X mode and -PIN-IRQ assertion mode (legacy). - - ------------ pci_enable_msix(,,n) ------------------------ - | | <=============== | | - | MSI-X MODE | | PIN-IRQ ASSERTION MODE | - | | ===============> | | - ------------ pci_disable_msix ------------------------ - -Figure 2. MSI-X Mode vs. Legacy Mode - -In Figure 2, a device operates by default in legacy mode. A -successful MSI-X request (using pci_enable_msix()) switches a -device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector -stored in dev->irq will be saved by the PCI subsystem; however, -unlike MSI mode, the PCI subsystem will not replace dev->irq with -assigned MSI-X vector because the PCI subsystem already writes the 1:1 -vector-to-entry mapping into the field 'vector' of each element -specified in second argument. - -To return back to its default mode, a device driver should always call -pci_disable_msix() to undo the effect of pci_enable_msix(). Note that -a device driver should always call free_irq() on all MSI-X vectors it -has done request_irq() on before calling pci_disable_msix(). Failure -to do so results in a BUG_ON() and a device will be left with MSI-X -enabled and leaks its vectors. Otherwise, the PCI subsystem switches a -device function's interrupt mode from MSI-X mode to legacy mode and -marks all allocated MSI-X vectors as unused. - -Once being marked as unused, there is no guarantee that the PCI -subsystem will reserve these MSI-X vectors for a device. Depending on -the availability of current PCI vector resources and the number of -MSI/MSI-X requests from other drivers, these MSI-X vectors may be -re-assigned. - -For the case where the PCI subsystem re-assigned these MSI-X vectors -to other drivers, a request to switch back to MSI-X mode may result -being assigned with another set of MSI-X vectors or a failure if no -more vectors are available. - -5.4 Handling function implementing both MSI and MSI-X capabilities - -For the case where a function implements both MSI and MSI-X -capabilities, the PCI subsystem enables a device to run either in MSI -mode or MSI-X mode but not both. A device driver determines whether it -wants MSI or MSI-X enabled on its hardware device. Once a device -driver requests for MSI, for example, it is prohibited from requesting -MSI-X; in other words, a device driver is not permitted to ping-pong -between MSI mod MSI-X mode during a run-time. - -5.5 Hardware requirements for MSI/MSI-X support - -MSI/MSI-X support requires support from both system hardware and -individual hardware device functions. - -5.5.1 Required x86 hardware support - -Since the target of MSI address is the local APIC CPU, enabling -MSI/MSI-X support in the Linux kernel is dependent on whether existing -system hardware supports local APIC. Users should verify that their -system supports local APIC operation by testing that it runs when -CONFIG_X86_LOCAL_APIC=y. - -In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set; -however, in UP environment, users must manually set -CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting -CONFIG_PCI_MSI enables the VECTOR based scheme and the option for -MSI-capable device drivers to selectively enable MSI/MSI-X. - -Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X -vector is allocated new during runtime and MSI/MSI-X support does not -depend on BIOS support. This key independency enables MSI/MSI-X -support on future IOxAPIC free platforms. - -5.5.2 Device hardware support - -The hardware device function supports MSI by indicating the -MSI/MSI-X capability structure on its PCI capability list. By -default, this capability structure will not be initialized by -the kernel to enable MSI during the system boot. In other words, -the device function is running on its default pin assertion mode. -Note that in many cases the hardware supporting MSI have bugs, -which may result in system hangs. The software driver of specific -MSI-capable hardware is responsible for deciding whether to call -pci_enable_msi or not. A return of zero indicates the kernel -successfully initialized the MSI/MSI-X capability structure of the -device function. The device function is now running on MSI/MSI-X mode. - -5.6 How to tell whether MSI/MSI-X is enabled on device function - -At the driver level, a return of zero from the function call of -pci_enable_msi()/pci_enable_msix() indicates to a device driver that -its device function is initialized successfully and ready to run in -MSI/MSI-X mode. - -At the user level, users can use the command 'cat /proc/interrupts' -to display the vectors allocated for devices and their interrupt -MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is -enabled on a SCSI Adaptec 39320D Ultra320 controller. - - CPU0 CPU1 - 0: 324639 0 IO-APIC-edge timer - 1: 1186 0 IO-APIC-edge i8042 - 2: 0 0 XT-PIC cascade - 12: 2797 0 IO-APIC-edge i8042 - 14: 6543 0 IO-APIC-edge ide0 - 15: 1 0 IO-APIC-edge ide1 -169: 0 0 IO-APIC-level uhci-hcd -185: 0 0 IO-APIC-level uhci-hcd -193: 138 10 PCI-MSI aic79xx -201: 30 0 PCI-MSI aic79xx -225: 30 0 IO-APIC-level aic7xxx -233: 30 0 IO-APIC-level aic7xxx -NMI: 0 0 -LOC: 324553 325068 -ERR: 0 -MIS: 0 - -6. MSI quirks - -Several PCI chipsets or devices are known to not support MSI. -The PCI stack provides 3 possible levels of MSI disabling: -* on a single device -* on all devices behind a specific bridge -* globally - -6.1. Disabling MSI on a single device - -Under some circumstances it might be required to disable MSI on a -single device. This may be achieved by either not calling pci_enable_msi() -or all, or setting the pci_dev->no_msi flag before (most of the time -in a quirk). - -6.2. Disabling MSI below a bridge - -The vast majority of MSI quirks are required by PCI bridges not -being able to route MSI between busses. In this case, MSI have to be -disabled on all devices behind this bridge. It is achieves by setting -the PCI_BUS_FLAGS_NO_MSI flag in the pci_bus->bus_flags of the bridge -subordinate bus. There is no need to set the same flag on bridges that -are below the broken bridge. When pci_enable_msi() is called to enable -MSI on a device, pci_msi_supported() takes care of checking the NO_MSI -flag in all parent busses of the device. - -Some bridges actually support dynamic MSI support enabling/disabling -by changing some bits in their PCI configuration space (especially -the Hypertransport chipsets such as the nVidia nForce and Serverworks -HT2000). It may then be required to update the NO_MSI flag on the -corresponding devices in the sysfs hierarchy. To enable MSI support -on device "0000:00:0e", do: - - echo 1 > /sys/bus/pci/devices/0000:00:0e/msi_bus - -To disable MSI support, echo 0 instead of 1. Note that it should be -used with caution since changing this value might break interrupts. - -6.3. Disabling MSI globally - -Some extreme cases may require to disable MSI globally on the system. -For now, the only known case is a Serverworks PCI-X chipsets (MSI are -not supported on several busses that are not all connected to the -chipset in the Linux PCI hierarchy). In the vast majority of other -cases, disabling only behind a specific bridge is enough. - -For debugging purpose, the user may also pass pci=nomsi on the kernel -command-line to explicitly disable MSI globally. But, once the appro- -priate quirks are added to the kernel, this option should not be -required anymore. - -6.4. Finding why MSI cannot be enabled on a device - -Assuming that MSI are not enabled on a device, you should look at -dmesg to find messages that quirks may output when disabling MSI -on some devices, some bridges or even globally. -Then, lspci -t gives the list of bridges above a device. Reading -/sys/bus/pci/devices/0000:00:0e/msi_bus will tell you whether MSI -are enabled (1) or disabled (0). In 0 is found in a single bridge -msi_bus file above the device, MSI cannot be enabled. - -7. FAQ - -Q1. Are there any limitations on using the MSI? - -A1. If the PCI device supports MSI and conforms to the -specification and the platform supports the APIC local bus, -then using MSI should work. - -Q2. Will it work on all the Pentium processors (P3, P4, Xeon, -AMD processors)? In P3 IPI's are transmitted on the APIC local -bus and in P4 and Xeon they are transmitted on the system -bus. Are there any implications with this? - -A2. MSI support enables a PCI device sending an inbound -memory write (0xfeexxxxx as target address) on its PCI bus -directly to the FSB. Since the message address has a -redirection hint bit cleared, it should work. - -Q3. The target address 0xfeexxxxx will be translated by the -Host Bridge into an interrupt message. Are there any -limitations on the chipsets such as Intel 8xx, Intel e7xxx, -or VIA? - -A3. If these chipsets support an inbound memory write with -target address set as 0xfeexxxxx, as conformed to PCI -specification 2.3 or latest, then it should work. - -Q4. From the driver point of view, if the MSI is lost because -of errors occurring during inbound memory write, then it may -wait forever. Is there a mechanism for it to recover? - -A4. Since the target of the transaction is an inbound memory -write, all transaction termination conditions (Retry, -Master-Abort, Target-Abort, or normal completion) are -supported. A device sending an MSI must abide by all the PCI -rules and conditions regarding that inbound memory write. So, -if a retry is signaled it must retry, etc... We believe that -the recommendation for Abort is also a retry (refer to PCI -specification 2.3 or latest). +This API should be used to undo the effect of pci_enable_msix(). It frees +the previously allocated message signaled interrupts. The interrupts may +subsequently be assigned to another device, so drivers should not cache +the value of the 'vector' elements over a call to pci_disable_msix(). + +A device driver must always call free_irq() on the interrupt(s) +for which it has called request_irq() before calling this function. +Failure to do so will result in a BUG_ON(), the device will be left with +MSI enabled and will leak its vector. + +4.3.3 The MSI-X Table + +The MSI-X capability specifies a BAR and offset within that BAR for the +MSI-X Table. This address is mapped by the PCI subsystem, and should not +be accessed directly by the device driver. If the driver wishes to +mask or unmask an interrupt, it should call disable_irq() / enable_irq(). + +4.4 Handling devices implementing both MSI and MSI-X capabilities + +If a device implements both MSI and MSI-X capabilities, it can +run in either MSI mode or MSI-X mode but not both simultaneously. +This is a requirement of the PCI spec, and it is enforced by the +PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or +pci_enable_msix() when MSI is already enabled will result in an error. +If a device driver wishes to switch between MSI and MSI-X at runtime, +it must first quiesce the device, then switch it back to pin-interrupt +mode, before calling pci_enable_msi() or pci_enable_msix() and resuming +operation. This is not expected to be a common operation but may be +useful for debugging or testing during development. + +4.5 Considerations when using MSIs + +4.5.1 Choosing between MSI-X and MSI + +If your device supports both MSI-X and MSI capabilities, you should use +the MSI-X facilities in preference to the MSI facilities. As mentioned +above, MSI-X supports any number of interrupts between 1 and 2048. +In constrast, MSI is restricted to a maximum of 32 interrupts (and +must be a power of two). In addition, the MSI interrupt vectors must +be allocated consecutively, so the system may not be able to allocate +as many vectors for MSI as it could for MSI-X. On some platforms, MSI +interrupts must all be targetted at the same set of CPUs whereas MSI-X +interrupts can all be targetted at different CPUs. + +4.5.2 Spinlocks + +Most device drivers have a per-device spinlock which is taken in the +interrupt handler. With pin-based interrupts or a single MSI, it is not +necessary to disable interrupts (Linux guarantees the same interrupt will +not be re-entered). If a device uses multiple interrupts, the driver +must disable interrupts while the lock is held. If the device sends +a different interrupt, the driver will deadlock trying to recursively +acquire the spinlock. + +There are two solutions. The first is to take the lock with +spin_lock_irqsave() or spin_lock_irq() (see +Documentation/DocBook/kernel-locking). The second is to specify +IRQF_DISABLED to request_irq() so that the kernel runs the entire +interrupt routine with interrupts disabled. + +If your MSI interrupt routine does not hold the lock for the whole time +it is running, the first solution may be best. The second solution is +normally preferred as it avoids making two transitions from interrupt +disabled to enabled and back again. + +4.6 How to tell whether MSI/MSI-X is enabled on a device + +Using 'lspci -v' (as root) may show some devices with "MSI", "Message +Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities +has an 'Enable' flag which will be followed with either "+" (enabled) +or "-" (disabled). + + +5. MSI quirks + +Several PCI chipsets or devices are known not to support MSIs. +The PCI stack provides three ways to disable MSIs: + +1. globally +2. on all devices behind a specific bridge +3. on a single device + +5.1. Disabling MSIs globally + +Some host chipsets simply don't support MSIs properly. If we're +lucky, the manufacturer knows this and has indicated it in the ACPI +FADT table. In this case, Linux will automatically disable MSIs. +Some boards don't include this information in the table and so we have +to detect them ourselves. The complete list of these is found near the +quirk_disable_all_msi() function in drivers/pci/quirks.c. + +If you have a board which has problems with MSIs, you can pass pci=nomsi +on the kernel command line to disable MSIs on all devices. It would be +in your best interests to report the problem to linux-pci@vger.kernel.org +including a full 'lspci -v' so we can add the quirks to the kernel. + +5.2. Disabling MSIs below a bridge + +Some PCI bridges are not able to route MSIs between busses properly. +In this case, MSIs must be disabled on all devices behind the bridge. + +Some bridges allow you to enable MSIs by changing some bits in their +PCI configuration space (especially the Hypertransport chipsets such +as the nVidia nForce and Serverworks HT2000). As with host chipsets, +Linux mostly knows about them and automatically enables MSIs if it can. +If you have a bridge which Linux doesn't yet know about, you can enable +MSIs in configuration space using whatever method you know works, then +enable MSIs on that bridge by doing: + + echo 1 > /sys/bus/pci/devices/$bridge/msi_bus + +where $bridge is the PCI address of the bridge you've enabled (eg +0000:00:0e.0). + +To disable MSIs, echo 0 instead of 1. Changing this value should be +done with caution as it can break interrupt handling for all devices +below this bridge. + +Again, please notify linux-pci@vger.kernel.org of any bridges that need +special handling. + +5.3. Disabling MSIs on a single device + +Some devices are known to have faulty MSI implementations. Usually this +is handled in the individual device driver but occasionally it's necessary +to handle this with a quirk. Some drivers have an option to disable use +of MSI. While this is a convenient workaround for the driver author, +it is not good practise, and should not be emulated. + +5.4. Finding why MSIs are disabled on a device + +From the above three sections, you can see that there are many reasons +why MSIs may not be enabled for a given device. Your first step should +be to examine your dmesg carefully to determine whether MSIs are enabled +for your machine. You should also check your .config to be sure you +have enabled CONFIG_PCI_MSI. + +Then, 'lspci -t' gives the list of bridges above a device. Reading +/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1) +or disabled (0). If 0 is found in any of the msi_bus files belonging +to bridges between the PCI root and the device, MSIs are disabled. + +It is also worth checking the device driver to see whether it supports MSIs. +For example, it may contain calls to pci_enable_msi(), pci_enable_msix() or +pci_enable_msi_block(). diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt new file mode 100644 index 000000000000..fc73ef5d65b8 --- /dev/null +++ b/Documentation/PCI/pci-iov-howto.txt @@ -0,0 +1,99 @@ + PCI Express I/O Virtualization Howto + Copyright (C) 2009 Intel Corporation + Yu Zhao <yu.zhao@intel.com> + + +1. Overview + +1.1 What is SR-IOV + +Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended +capability which makes one physical device appear as multiple virtual +devices. The physical device is referred to as Physical Function (PF) +while the virtual devices are referred to as Virtual Functions (VF). +Allocation of the VF can be dynamically controlled by the PF via +registers encapsulated in the capability. By default, this feature is +not enabled and the PF behaves as traditional PCIe device. Once it's +turned on, each VF's PCI configuration space can be accessed by its own +Bus, Device and Function Number (Routing ID). And each VF also has PCI +Memory Space, which is used to map its register set. VF device driver +operates on the register set so it can be functional and appear as a +real existing PCI device. + +2. User Guide + +2.1 How can I enable SR-IOV capability + +The device driver (PF driver) will control the enabling and disabling +of the capability via API provided by SR-IOV core. If the hardware +has SR-IOV capability, loading its PF driver would enable it and all +VFs associated with the PF. + +2.2 How can I use the Virtual Functions + +The VF is treated as hot-plugged PCI devices in the kernel, so they +should be able to work in the same way as real PCI devices. The VF +requires device driver that is same as a normal PCI device's. + +3. Developer Guide + +3.1 SR-IOV API + +To enable SR-IOV capability: + int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); + 'nr_virtfn' is number of VFs to be enabled. + +To disable SR-IOV capability: + void pci_disable_sriov(struct pci_dev *dev); + +To notify SR-IOV core of Virtual Function Migration: + irqreturn_t pci_sriov_migration(struct pci_dev *dev); + +3.2 Usage example + +Following piece of code illustrates the usage of the SR-IOV API. + +static int __devinit dev_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + pci_enable_sriov(dev, NR_VIRTFN); + + ... + + return 0; +} + +static void __devexit dev_remove(struct pci_dev *dev) +{ + pci_disable_sriov(dev); + + ... +} + +static int dev_suspend(struct pci_dev *dev, pm_message_t state) +{ + ... + + return 0; +} + +static int dev_resume(struct pci_dev *dev) +{ + ... + + return 0; +} + +static void dev_shutdown(struct pci_dev *dev) +{ + ... +} + +static struct pci_driver dev_driver = { + .name = "SR-IOV Physical Function driver", + .id_table = dev_id_table, + .probe = dev_probe, + .remove = __devexit_p(dev_remove), + .suspend = dev_suspend, + .resume = dev_resume, + .shutdown = dev_shutdown, +}; diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX index caabbd395e61..a618fd99c9f0 100644 --- a/Documentation/fb/00-INDEX +++ b/Documentation/fb/00-INDEX @@ -11,8 +11,6 @@ aty128fb.txt - info on the ATI Rage128 frame buffer driver. cirrusfb.txt - info on the driver for Cirrus Logic chipsets. -cyblafb/ - - directory with documentation files related to the cyblafb driver. deferred_io.txt - an introduction to deferred IO. fbcon.txt diff --git a/Documentation/fb/cyblafb/bugs b/Documentation/fb/cyblafb/bugs deleted file mode 100644 index 9443a6d72cdd..000000000000 --- a/Documentation/fb/cyblafb/bugs +++ /dev/null @@ -1,13 +0,0 @@ -Bugs -==== - -I currently don't know of any bug. Please do send reports to: - - linux-fbdev-devel@lists.sourceforge.net - - Knut_Petersen@t-online.de. - - -Untested features -================= - -All LCD stuff is untested. If it worked in tridentfb, it should work in -cyblafb. Please test and report the results to Knut_Petersen@t-online.de. diff --git a/Documentation/fb/cyblafb/credits b/Documentation/fb/cyblafb/credits deleted file mode 100644 index 0eb3b443dc2b..000000000000 --- a/Documentation/fb/cyblafb/credits +++ /dev/null @@ -1,7 +0,0 @@ -Thanks to -========= - * Alan Hourihane, for writing the X trident driver - * Jani Monoses, for writing the tridentfb driver - * Antonino A. Daplas, for review of the first published - version of cyblafb and some code - * Jochen Hein, for testing and a helpfull bug report diff --git a/Documentation/fb/cyblafb/documentation b/Documentation/fb/cyblafb/documentation deleted file mode 100644 index bb1aac048425..000000000000 --- a/Documentation/fb/cyblafb/documentation +++ /dev/null @@ -1,17 +0,0 @@ -Available Documentation -======================= - -Apollo PLE 133 Chipset VT8601A North Bridge Datasheet, Rev. 1.82, October 22, -2001, available from VIA: - - http://www.viavpsd.com/product/6/15/DS8601A182.pdf - -The datasheet is incomplete, some registers that need to be programmed are not -explained at all and important bits are listed as "reserved". But you really -need the datasheet to understand the code. "p. xxx" comments refer to page -numbers of this document. - -XFree/XOrg drivers are available and of good quality, looking at the code -there is a good idea if the datasheet does not provide enough information -or if the datasheet seems to be wrong. - diff --git a/Documentation/fb/cyblafb/fb.modes b/Documentation/fb/cyblafb/fb.modes deleted file mode 100644 index fe0e5223ba86..000000000000 --- a/Documentation/fb/cyblafb/fb.modes +++ /dev/null @@ -1,154 +0,0 @@ -# -# Sample fb.modes file -# -# Provides an incomplete list of working modes for -# the cyberblade/i1 graphics core. -# -# The value 4294967256 is used instead of -40. Of course, -40 is not -# a really reasonable value, but chip design does not always follow -# logic. Believe me, it's ok, and it's the way the BIOS does it. -# -# fbset requires 4294967256 in fb.modes and -40 as an argument to -# the -t parameter. That's also not too reasonable, and it might change -# in the future or might even be differt for your current version. -# - -mode "640x480-50" - geometry 640 480 2048 4096 8 - timings 47619 4294967256 24 17 0 216 3 -endmode - -mode "640x480-60" - geometry 640 480 2048 4096 8 - timings 39682 4294967256 24 17 0 216 3 -endmode - -mode "640x480-70" - geometry 640 480 2048 4096 8 - timings 34013 4294967256 24 17 0 216 3 -endmode - -mode "640x480-72" - geometry 640 480 2048 4096 8 - timings 33068 4294967256 24 17 0 216 3 -endmode - -mode "640x480-75" - geometry 640 480 2048 4096 8 - timings 31746 4294967256 24 17 0 216 3 -endmode - -mode "640x480-80" - geometry 640 480 2048 4096 8 - timings 29761 4294967256 24 17 0 216 3 -endmode - -mode "640x480-85" - geometry 640 480 2048 4096 8 - timings 28011 4294967256 24 17 0 216 3 -endmode - -mode "800x600-50" - geometry 800 600 2048 4096 8 - timings 30303 96 24 14 0 136 11 -endmode - -mode "800x600-60" - geometry 800 600 2048 4096 8 - timings 25252 96 24 14 0 136 11 -endmode - -mode "800x600-70" - geometry 800 600 2048 4096 8 - timings 21645 96 24 14 0 136 11 -endmode - -mode "800x600-72" - geometry 800 600 2048 4096 8 - timings 21043 96 24 14 0 136 11 -endmode - -mode "800x600-75" - geometry 800 600 2048 4096 8 - timings 20202 96 24 14 0 136 11 -endmode - -mode "800x600-80" - geometry 800 600 2048 4096 8 - timings 18939 96 24 14 0 136 11 -endmode - -mode "800x600-85" - geometry 800 600 2048 4096 8 - timings 17825 96 24 14 0 136 11 -endmode - -mode "1024x768-50" - geometry 1024 768 2048 4096 8 - timings 19054 144 24 29 0 120 3 -endmode - -mode "1024x768-60" - geometry 1024 768 2048 4096 8 - timings 15880 144 24 29 0 120 3 -endmode - -mode "1024x768-70" - geometry 1024 768 2048 4096 8 - timings 13610 144 24 29 0 120 3 -endmode - -mode "1024x768-72" - geometry 1024 768 2048 4096 8 - timings 13232 144 24 29 0 120 3 -endmode - -mode "1024x768-75" - geometry 1024 768 2048 4096 8 - timings 12703 144 24 29 0 120 3 -endmode - -mode "1024x768-80" - geometry 1024 768 2048 4096 8 - timings 11910 144 24 29 0 120 3 -endmode - -mode "1024x768-85" - geometry 1024 768 2048 4096 8 - timings 11209 144 24 29 0 120 3 -endmode - -mode "1280x1024-50" - geometry 1280 1024 2048 4096 8 - timings 11114 232 16 39 0 160 3 -endmode - -mode "1280x1024-60" - geometry 1280 1024 2048 4096 8 - timings 9262 232 16 39 0 160 3 -endmode - -mode "1280x1024-70" - geometry 1280 1024 2048 4096 8 - timings 7939 232 16 39 0 160 3 -endmode - -mode "1280x1024-72" - geometry 1280 1024 2048 4096 8 - timings 7719 232 16 39 0 160 3 -endmode - -mode "1280x1024-75" - geometry 1280 1024 2048 4096 8 - timings 7410 232 16 39 0 160 3 -endmode - -mode "1280x1024-80" - geometry 1280 1024 2048 4096 8 - timings 6946 232 16 39 0 160 3 -endmode - -mode "1280x1024-85" - geometry 1280 1024 2048 4096 8 - timings 6538 232 16 39 0 160 3 -endmode diff --git a/Documentation/fb/cyblafb/performance b/Documentation/fb/cyblafb/performance deleted file mode 100644 index 8d15d5dfc6b3..000000000000 --- a/Documentation/fb/cyblafb/performance +++ /dev/null @@ -1,79 +0,0 @@ -Speed -===== - -CyBlaFB is much faster than tridentfb and vesafb. Compare the performance data -for mode 1280x1024-[8,16,32]@61 Hz. - -Test 1: Cat a file with 2000 lines of 0 characters. -Test 2: Cat a file with 2000 lines of 80 characters. -Test 3: Cat a file with 2000 lines of 160 characters. - -All values show system time use in seconds, kernel 2.6.12 was used for -the measurements. 2.6.13 is a bit slower, 2.6.14 hopefully will include a -patch that speeds up kernel bitblitting a lot ( > 20%). - -+-----------+-----------------------------------------------------+ -| | not accelerated | -| TRIDENTFB +-----------------+-----------------+-----------------+ -| of 2.6.12 | 8 bpp | 16 bpp | 32 bpp | -| | noypan | ypan | noypan | ypan | noypan | ypan | -+-----------+--------+--------+--------+--------+--------+--------+ -| Test 1 | 4.31 | 4.33 | 6.05 | 12.81 | ---- | ---- | -| Test 2 | 67.94 | 5.44 | 123.16 | 14.79 | ---- | ---- | -| Test 3 | 131.36 | 6.55 | 240.12 | 16.76 | ---- | ---- | -+-----------+--------+--------+--------+--------+--------+--------+ -| Comments | | | completely bro- | -| | | | ken, monitor | -| | | | switches off | -+-----------+-----------------+-----------------+-----------------+ - - -+-----------+-----------------------------------------------------+ -| | accelerated | -| TRIDENTFB +-----------------+-----------------+-----------------+ -| of 2.6.12 | 8 bpp | 16 bpp | 32 bpp | -| | noypan | ypan | noypan | ypan | noypan | ypan | -+-----------+--------+--------+--------+--------+--------+--------+ -| Test 1 | ---- | ---- | 20.62 | 1.22 | ---- | ---- | -| Test 2 | ---- | ---- | 22.61 | 3.19 | ---- | ---- | -| Test 3 | ---- | ---- | 24.59 | 5.16 | ---- | ---- | -+-----------+--------+--------+--------+--------+--------+--------+ -| Comments | broken, writing | broken, ok only | completely bro- | -| | to wrong places | if bgcolor is | ken, monitor | -| | on screen + bug | black, bug in | switches off | -| | in fillrect() | fillrect() | | -+-----------+-----------------+-----------------+-----------------+ - - -+-----------+-----------------------------------------------------+ -| | not accelerated | -| VESAFB +-----------------+-----------------+-----------------+ -| of 2.6.12 | 8 bpp | 16 bpp | 32 bpp | -| | noypan | ypan | noypan | ypan | noypan | ypan | -+-----------+--------+--------+--------+--------+--------+--------+ -| Test 1 | 4.26 | 3.76 | 5.99 | 7.23 | ---- | ---- | -| Test 2 | 65.65 | 4.89 | 120.88 | 9.08 | ---- | ---- | -| Test 3 | 126.91 | 5.94 | 235.77 | 11.03 | ---- | ---- | -+-----------+--------+--------+--------+--------+--------+--------+ -| Comments | vga=0x307 | vga=0x31a | vga=0x31b not | -| | fh=80kHz | fh=80kHz | supported by | -| | fv=75kHz | fv=75kHz | video BIOS and | -| | | | hardware | -+-----------+-----------------+-----------------+-----------------+ - - -+-----------+-----------------------------------------------------+ -| | accelerated | -| CYBLAFB +-----------------+-----------------+-----------------+ -| | 8 bpp | 16 bpp | 32 bpp | -| | noypan | ypan | noypan | ypan | noypan | ypan | -+-----------+--------+--------+--------+--------+--------+--------+ -| Test 1 | 8.02 | 0.23 | 19.04 | 0.61 | 57.12 | 2.74 | -| Test 2 | 8.38 | 0.55 | 19.39 | 0.92 | 57.54 | 3.13 | -| Test 3 | 8.73 | 0.86 | 19.74 | 1.24 | 57.95 | 3.51 | -+-----------+--------+--------+--------+--------+--------+--------+ -| Comments | | | | -| | | | | -| | | | | -| | | | | -+-----------+-----------------+-----------------+-----------------+ diff --git a/Documentation/fb/cyblafb/todo b/Documentation/fb/cyblafb/todo deleted file mode 100644 index c5f6d0eae545..000000000000 --- a/Documentation/fb/cyblafb/todo +++ /dev/null @@ -1,31 +0,0 @@ -TODO / Missing features -======================= - -Verify LCD stuff "stretch" and "center" options are - completely untested ... this code needs to be - verified. As I don't have access to such - hardware, please contact me if you are - willing run some tests. - -Interlaced video modes The reason that interleaved - modes are disabled is that I do not know - the meaning of the vertical interlace - parameter. Also the datasheet mentions a - bit d8 of a horizontal interlace parameter, - but nowhere the lower 8 bits. Please help - if you can. - -low-res double scan modes Who needs it? - -accelerated color blitting Who needs it? The console driver does use color - blitting for nothing but drawing the penguine, - everything else is done using color expanding - blitting of 1bpp character bitmaps. - -ioctls Who needs it? - -TV-out Will be done later. Use "vga= " at boot time - to set a suitable video mode. - -??? Feel free to contact me if you have any - feature requests diff --git a/Documentation/fb/cyblafb/usage b/Documentation/fb/cyblafb/usage deleted file mode 100644 index a39bb3d402a2..000000000000 --- a/Documentation/fb/cyblafb/usage +++ /dev/null @@ -1,217 +0,0 @@ -CyBlaFB is a framebuffer driver for the Cyberblade/i1 graphics core integrated -into the VIA Apollo PLE133 (aka vt8601) south bridge. It is developed and -tested using a VIA EPIA 5000 board. - -Cyblafb - compiled into the kernel or as a module? -================================================== - -You might compile cyblafb either as a module or compile it permanently into the -kernel. - -Unless you have a real reason to do so you should not compile both vesafb and -cyblafb permanently into the kernel. It's possible and it helps during the -developement cycle, but it's useless and will at least block some otherwise -usefull memory for ordinary users. - -Selecting Modes -=============== - - Startup Mode - ============ - - First of all, you might use the "vga=???" boot parameter as it is - documented in vesafb.txt and svga.txt. Cyblafb will detect the video - mode selected and will use the geometry and timings found by - inspecting the hardware registers. - - video=cyblafb vga=0x317 - - Alternatively you might use a combination of the mode, ref and bpp - parameters. If you compiled the driver into the kernel, add something - like this to the kernel command line: - - video=cyblafb:1280x1024,bpp=16,ref=50 ... - - If you compiled the driver as a module, the same mode would be - selected by the following command: - - modprobe cyblafb mode=1280x1024 bpp=16 ref=50 ... - - None of the modes possible to select as startup modes are affected by - the problems described at the end of the next subsection. - - For all startup modes cyblafb chooses a virtual x resolution of 2048, - the only exception is mode 1280x1024 in combination with 32 bpp. This - allows ywrap scrolling for all those modes if rotation is 0 or 2, and - also fast scrolling if rotation is 1 or 3. The default virtual y reso- - lution is 4096 for bpp == 8, 2048 for bpp==16 and 1024 for bpp == 32, - again with the only exception of 1280x1024 at 32 bpp. - - Please do set your video memory size to 8 Mb in the Bios setup. Other - values will work, but performace is decreased for a lot of modes. - - Mode changes using fbset - ======================== - - You might use fbset to change the video mode, see "man fbset". Cyblafb - generally does assume that you know what you are doing. But it does - some checks, especially those that are needed to prevent you from - damaging your hardware. - - - only 8, 16, 24 and 32 bpp video modes are accepted - - interlaced video modes are not accepted - - double scan video modes are not accepted - - if a flat panel is found, cyblafb does not allow you - to program a resolution higher than the physical - resolution of the flat panel monitor - - cyblafb does not allow vclk to exceed 230 MHz. As 32 bpp - and (currently) 24 bit modes use a doubled vclk internally, - the dotclock limit as seen by fbset is 115 MHz for those - modes and 230 MHz for 8 and 16 bpp modes. - - cyblafb will allow you to select very high resolutions as - long as the hardware can be programmed to these modes. The - documented limit 1600x1200 is not enforced, but don't expect - perfect signal quality. - - Any request that violates the rules given above will be either changed - to something the hardware supports or an error value will be returned. - - If you program a virtual y resolution higher than the hardware limit, - cyblafb will silently decrease that value to the highest possible - value. The same is true for a virtual x resolution that is not - supported by the hardware. Cyblafb tries to adapt vyres first because - vxres decides if ywrap scrolling is possible or not. - - Attempts to disable acceleration are ignored, I believe that this is - safe. - - Some video modes that should work do not work as expected. If you use - the standard fb.modes, fbset 640x480-60 will program that mode, but - you will see a vertical area, about two characters wide, with only - much darker characters than the other characters on the screen. - Cyblafb does allow that mode to be set, as it does not violate the - official specifications. It would need a lot of code to reliably sort - out all invalid modes, playing around with the margin values will - give a valid mode quickly. And if cyblafb would detect such an invalid - mode, should it silently alter the requested values or should it - report an error? Both options have some pros and cons. As stated - above, none of the startup modes are affected, and if you set - verbosity to 1 or higher, cyblafb will print the fbset command that - would be needed to program that mode using fbset. - - -Other Parameters -================ - - -crt don't autodetect, assume monitor connected to - standard VGA connector - -fp don't autodetect, assume flat panel display - connected to flat panel monitor interface - -nativex inform driver about native x resolution of - flat panel monitor connected to special - interface (should be autodetected) - -stretch stretch image to adapt low resolution modes to - higer resolutions of flat panel monitors - connected to special interface - -center center image to adapt low resolution modes to - higer resolutions of flat panel monitors - connected to special interface - -memsize use if autodetected memsize is wrong ... - should never be necessary - -nopcirr disable PCI read retry -nopciwr disable PCI write retry -nopcirb disable PCI read bursts -nopciwb disable PCI write bursts - -bpp bpp for specified modes - valid values: 8 || 16 || 24 || 32 - -ref refresh rate for specified mode - valid values: 50 <= ref <= 85 - -mode 640x480 or 800x600 or 1024x768 or 1280x1024 - if not specified, the startup mode will be detected - and used, so you might also use the vga=??? parameter - described in vesafb.txt. If you do not specify a mode, - bpp and ref parameters are ignored. - -verbosity 0 is the default, increase to at least 2 for every - bug report! - -Development hints -================= - -It's much faster do compile a module and to load the new version after -unloading the old module than to compile a new kernel and to reboot. So if you -try to work on cyblafb, it might be a good idea to use cyblafb as a module. -In real life, fast often means dangerous, and that's also the case here. If -you introduce a serious bug when cyblafb is compiled into the kernel, the -kernel will lock or oops with a high probability before the file system is -mounted, and the danger for your data is low. If you load a broken own version -of cyblafb on a running system, the danger for the integrity of the file -system is much higher as you might need a hard reset afterwards. Decide -yourself. - -Module unloading, the vfb method -================================ - -If you want to unload/reload cyblafb using the virtual framebuffer, you need -to enable vfb support in the kernel first. After that, load the modules as -shown below: - - modprobe vfb vfb_enable=1 - modprobe fbcon - modprobe cyblafb - fbset -fb /dev/fb1 1280x1024-60 -vyres 2662 - con2fb /dev/fb1 /dev/tty1 - ... - -If you now made some changes to cyblafb and want to reload it, you might do it -as show below: - - con2fb /dev/fb0 /dev/tty1 - ... - rmmod cyblafb - modprobe cyblafb - con2fb /dev/fb1 /dev/tty1 - ... - -Of course, you might choose another mode, and most certainly you also want to -map some other /dev/tty* to the real framebuffer device. You might also choose -to compile fbcon as a kernel module or place it permanently in the kernel. - -I do not know of any way to unload fbcon, and fbcon will prevent the -framebuffer device loaded first from unloading. [If there is a way, then -please add a description here!] - -Module unloading, the vesafb method -=================================== - -Configure the kernel: - - <*> Support for frame buffer devices - [*] VESA VGA graphics support - <M> Cyberblade/i1 support - -Add e.g. "video=vesafb:ypan vga=0x307" to the kernel parameters. The ypan -parameter is important, choose any vga parameter you like as long as it is -a graphics mode. - -After booting, load cyblafb without any mode and bpp parameter and assign -cyblafb to individual ttys using con2fb, e.g.: - - modprobe cyblafb - con2fb /dev/fb1 /dev/tty1 - -Unloading cyblafb works without problems after you assign vesafb to all -ttys again, e.g.: - - con2fb /dev/fb0 /dev/tty1 - rmmod cyblafb diff --git a/Documentation/fb/cyblafb/whatsnew b/Documentation/fb/cyblafb/whatsnew deleted file mode 100644 index 76c07a26e044..000000000000 --- a/Documentation/fb/cyblafb/whatsnew +++ /dev/null @@ -1,29 +0,0 @@ -0.62 -==== - - - the vesafb parameter has been removed as I decided to allow the - feature without any special parameter. - - - Cyblafb does not use the vga style of panning any longer, now the - "right view" register in the graphics engine IO space is used. Without - that change it was impossible to use all available memory, and without - access to all available memory it is impossible to ywrap. - - - The imageblit function now uses hardware acceleration for all font - widths. Hardware blitting across pixel column 2048 is broken in the - cyberblade/i1 graphics core, but we work around that hardware bug. - - - modes with vxres != xres are supported now. - - - ywrap scrolling is supported now and the default. This is a big - performance gain. - - - default video modes use vyres > yres and vxres > xres to allow - almost optimal scrolling speed for normal and rotated screens - - - some features mainly usefull for debugging the upper layers of the - framebuffer system have been added, have a look at the code - - - fixed: Oops after unloading cyblafb when reading /proc/io* - - - we work around some bugs of the higher framebuffer layers. diff --git a/Documentation/fb/cyblafb/whycyblafb b/Documentation/fb/cyblafb/whycyblafb deleted file mode 100644 index a123bc11e698..000000000000 --- a/Documentation/fb/cyblafb/whycyblafb +++ /dev/null @@ -1,85 +0,0 @@ -I tried the following framebuffer drivers: - - - TRIDENTFB is full of bugs. Acceleration is broken for Blade3D - graphics cores like the cyberblade/i1. It claims to support a great - number of devices, but documentation for most of these devices is - unfortunately not available. There is _no_ reason to use tridentfb - for cyberblade/i1 + CRT users. VESAFB is faster, and the one - advantage, mode switching, is broken in tridentfb. - - - VESAFB is used by many distributions as a standard. Vesafb does - not support mode switching. VESAFB is a bit faster than the working - configurations of TRIDENTFB, but it is still too slow, even if you - use ypan. - - - EPIAFB (you'll find it on sourceforge) supports the Cyberblade/i1 - graphics core, but it still has serious bugs and developement seems - to have stopped. This is the one driver with TV-out support. If you - do need this feature, try epiafb. - -None of these drivers was a real option for me. - -I believe that is unreasonable to change code that announces to support 20 -devices if I only have more or less sufficient documentation for exactly one -of these. The risk of breaking device foo while fixing device bar is too high. - -So I decided to start CyBlaFB as a stripped down tridentfb. - -All code specific to other Trident chips has been removed. After that there -were a lot of cosmetic changes to increase the readability of the code. All -register names were changed to those mnemonics used in the datasheet. Function -and macro names were changed if they hindered easy understanding of the code. - -After that I debugged the code and implemented some new features. I'll try to -give a little summary of the main changes: - - - calculation of vertical and horizontal timings was fixed - - - video signal quality has been improved dramatically - - - acceleration: - - - fillrect and copyarea were fixed and reenabled - - - color expanding imageblit was newly implemented, color - imageblit (only used to draw the penguine) still uses the - generic code. - - - init of the acceleration engine was improved and moved to a - place where it really works ... - - - sync function has a timeout now and tries to reset and - reinit the accel engine if necessary - - - fewer slow copyarea calls when doing ypan scrolling by using - undocumented bit d21 of screen start address stored in - CR2B[5]. BIOS does use it also, so this should be safe. - - - cyblafb rejects any attempt to set modes that would cause vclk - values above reasonable 230 MHz. 32bit modes use a clock - multiplicator of 2, so fbset does show the correct values for - pixclock but not for vclk in this case. The fbset limit is 115 MHz - for 32 bpp modes. - - - cyblafb rejects modes known to be broken or unimplemented (all - interlaced modes, all doublescan modes for now) - - - cyblafb now works independant of the video mode in effect at startup - time (tridentfb does not init all needed registers to reasonable - values) - - - switching between video modes does work reliably now - - - the first video mode now is the one selected on startup using the - vga=???? mechanism or any of - - 640x480, 800x600, 1024x768, 1280x1024 - - 8, 16, 24 or 32 bpp - - refresh between 50 Hz and 85 Hz, 1 Hz steps (1280x1024-32 - is limited to 63Hz) - - - pci retry and pci burst mode are settable (try to disable if you - experience latency problems) - - - built as a module cyblafb might be unloaded and reloaded using - the vfb module and con2vt or might be used together with vesafb - diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 5e02b83ac12b..d0f354670646 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -311,6 +311,18 @@ Who: Vlad Yasevich <vladislav.yasevich@hp.com> --------------------------- +What: Ability for non root users to shm_get hugetlb pages based on mlock + resource limits +When: 2.6.31 +Why: Non root users need to be part of /proc/sys/vm/hugetlb_shm_group or + have CAP_IPC_LOCK to be able to allocate shm segments backed by + huge pages. The mlock based rlimit check to allow shm hugetlb is + inconsistent with mmap based allocations. Hence it is being + deprecated. +Who: Ravikiran Thirumalai <kiran@scalex86.org> + +--------------------------- + What: CONFIG_THERMAL_HWMON When: January 2009 Why: This option was introduced just to allow older lm-sensors userspace @@ -380,3 +392,35 @@ Why: The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t) have been kept around for migration reasons. After more than two years it's time to remove them finally Who: Thomas Gleixner <tglx@linutronix.de> + +--------------------------- + +What: fakephp and associated sysfs files in /sys/bus/pci/slots/ +When: 2011 +Why: In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to + represent a machine's physical PCI slots. The change in semantics + had userspace implications, as the hotplug core no longer allowed + drivers to create multiple sysfs files per physical slot (required + for multi-function devices, e.g.). fakephp was seen as a developer's + tool only, and its interface changed. Too late, we learned that + there were some users of the fakephp interface. + + In 2.6.30, the original fakephp interface was restored. At the same + time, the PCI core gained the ability that fakephp provided, namely + function-level hot-remove and hot-add. + + Since the PCI core now provides the same functionality, exposed in: + + /sys/bus/pci/rescan + /sys/bus/pci/devices/.../remove + /sys/bus/pci/devices/.../rescan + + there is no functional reason to maintain fakephp as well. + + We will keep the existing module so that 'modprobe fakephp' will + present the old /sys/bus/pci/slots/... interface for compatibility, + but users are urged to migrate their applications to the API above. + + After a reasonable transition period, we will remove the legacy + fakephp interface. +Who: Alex Chiang <achiang@hp.com> diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 4e78ce677843..76efe5b71d7d 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -505,7 +505,7 @@ prototypes: void (*open)(struct vm_area_struct*); void (*close)(struct vm_area_struct*); int (*fault)(struct vm_area_struct*, struct vm_fault *); - int (*page_mkwrite)(struct vm_area_struct *, struct page *); + int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *); int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); locking rules: diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index cec829bc7291..97882df04865 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -85,7 +85,7 @@ Note: More extensive information for getting started with ext4 can be * extent format more robust in face of on-disk corruption due to magics, * internal redundancy in tree * improved file allocation (multi-block alloc) -* fix 32000 subdirectory limit +* lift 32000 subdirectory limit imposed by i_links_count[1] * nsec timestamps for mtime, atime, ctime, create time * inode version field on disk (NFSv4, Lustre) * reduced e2fsck time via uninit_bg feature @@ -100,6 +100,9 @@ Note: More extensive information for getting started with ext4 can be * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force the ordering) +[1] Filesystems with a block size of 1k may see a limit imposed by the +directory hash tree having a maximum depth of two. + 2.2 Candidate features for future inclusion * Online defrag (patches available but not well tested) @@ -180,8 +183,8 @@ commit=nrsec (*) Ext4 can be told to sync all its data and metadata performance. barrier=<0|1(*)> This enables/disables the use of write barriers in - the jbd code. barrier=0 disables, barrier=1 enables. - This also requires an IO stack which can support +barrier(*) the jbd code. barrier=0 disables, barrier=1 enables. +nobarrier This also requires an IO stack which can support barriers, and if jbd gets an error on a barrier write, it will disable again with a warning. Write barriers enforce proper on-disk ordering @@ -189,6 +192,9 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in safe to use, at some performance penalty. If your disks are battery-backed in one way or another, disabling barriers may safely improve performance. + The mount options "barrier" and "nobarrier" can + also be used to enable or disable barriers, for + consistency with other ext4 mount options. inode_readahead=n This tuning parameter controls the maximum number of inode table blocks that ext4's inode @@ -310,6 +316,24 @@ journal_ioprio=prio The I/O priority (from 0 to 7, where 0 is the a slightly higher priority than the default I/O priority. +auto_da_alloc(*) Many broken applications don't use fsync() when +noauto_da_alloc replacing existing files via patterns such as + fd = open("foo.new")/write(fd,..)/close(fd)/ + rename("foo.new", "foo"), or worse yet, + fd = open("foo", O_TRUNC)/write(fd,..)/close(fd). + If auto_da_alloc is enabled, ext4 will detect + the replace-via-rename and replace-via-truncate + patterns and force that any delayed allocation + blocks are allocated such that at the next + journal commit, in the default data=ordered + mode, the data blocks of the new file are forced + to disk before the rename() operation is + commited. This provides roughly the same level + of guarantees as ext3, and avoids the + "zero-length" problem that can happen when a + system crashes before the delayed allocation + blocks are forced to disk. + Data Mode ========= There are 3 different data modes: diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 830bad7cce0f..efc4fd9f40ce 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -940,27 +940,6 @@ Table 1-10: Files in /proc/fs/ext4/<devname> File Content mb_groups details of multiblock allocator buddy cache of free blocks mb_history multiblock allocation history - stats controls whether the multiblock allocator should start - collecting statistics, which are shown during the unmount - group_prealloc the multiblock allocator will round up allocation - requests to a multiple of this tuning parameter if the - stripe size is not set in the ext4 superblock - max_to_scan The maximum number of extents the multiblock allocator - will search to find the best extent - min_to_scan The minimum number of extents the multiblock allocator - will search to find the best extent - order2_req Tuning parameter which controls the minimum size for - requests (as a power of 2) where the buddy cache is - used - stream_req Files which have fewer blocks than this tunable - parameter will have their blocks allocated out of a - block group specific preallocation pool, so that small - files are packed closely together. Each large file - will have its blocks allocated out of its own unique - preallocation pool. -inode_readahead Tuning parameter which controls the maximum number of - inode table blocks that ext4's inode table readahead - algorithm will pre-read into the buffer cache .............................................................................. diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 9f8740ca3f3b..26e4b8bc53ee 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt @@ -12,6 +12,7 @@ that support it. For example, a given bus might look like this: | |-- enable | |-- irq | |-- local_cpus + | |-- remove | |-- resource | |-- resource0 | |-- resource1 @@ -36,6 +37,7 @@ files, each with their own function. enable Whether the device is enabled (ascii, rw) irq IRQ number (ascii, ro) local_cpus nearby CPU mask (cpumask, ro) + remove remove device from kernel's list (ascii, wo) resource PCI resource host addresses (ascii, ro) resource0..N PCI resource N, if present (binary, mmap) resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) @@ -46,6 +48,7 @@ files, each with their own function. ro - read only file rw - file is readable and writable + wo - write only file mmap - file is mmapable ascii - file contains ascii text binary - file contains binary data @@ -73,6 +76,13 @@ that the device must be enabled for a rom read to return data succesfully. In the event a driver is not bound to the device, it can be enabled using the 'enable' file, documented above. +The 'remove' file is used to remove the PCI device, by writing a non-zero +integer to the file. This does not involve any kind of hot-plug functionality, +e.g. powering off the device. The device is removed from the kernel's list of +PCI devices, the sysfs directory for it is removed, and the device will be +removed from any drivers attached to it. Removal of PCI root buses is +disallowed. + Accessing legacy resources through sysfs ---------------------------------------- diff --git a/Documentation/hwmon/lis3lv02d b/Documentation/hwmon/lis3lv02d index 287f8c902656..effe949a7282 100644 --- a/Documentation/hwmon/lis3lv02d +++ b/Documentation/hwmon/lis3lv02d @@ -1,11 +1,11 @@ Kernel driver lis3lv02d -================== +======================= Supported chips: * STMicroelectronics LIS3LV02DL and LIS3LV02DQ -Author: +Authors: Yan Burman <burman.yan@gmail.com> Eric Piel <eric.piel@tremplin-utc.net> @@ -15,7 +15,7 @@ Description This driver provides support for the accelerometer found in various HP laptops sporting the feature officially called "HP Mobile Data -Protection System 3D" or "HP 3D DriveGuard". It detect automatically +Protection System 3D" or "HP 3D DriveGuard". It detects automatically laptops with this sensor. Known models (for now the HP 2133, nc6420, nc2510, nc8510, nc84x0, nw9440 and nx9420) will have their axis automatically oriented on standard way (eg: you can directly play @@ -27,7 +27,7 @@ position - 3D position that the accelerometer reports. Format: "(x,y,z)" calibrate - read: values (x, y, z) that are used as the base for input class device operation. write: forces the base to be recalibrated with the current - position. + position. rate - reports the sampling rate of the accelerometer device in HZ This driver also provides an absolute input class device, allowing @@ -48,7 +48,7 @@ For better compatibility between the various laptops. The values reported by the accelerometer are converted into a "standard" organisation of the axes (aka "can play neverball out of the box"): * When the laptop is horizontal the position reported is about 0 for X and Y -and a positive value for Z + and a positive value for Z * If the left side is elevated, X increases (becomes positive) * If the front side (where the touchpad is) is elevated, Y decreases (becomes negative) @@ -59,3 +59,13 @@ email to the authors to add it to the database. When reporting a new laptop, please include the output of "dmidecode" plus the value of /sys/devices/platform/lis3lv02d/position in these four cases. +Q&A +--- + +Q: How do I safely simulate freefall? I have an HP "portable +workstation" which has about 3.5kg and a plastic case, so letting it +fall to the ground is out of question... + +A: The sensor is pretty sensitive, so your hands can do it. Lift it +into free space, follow the fall with your hands for like 10 +centimeters. That should be enough to trigger the detection. diff --git a/Documentation/hwmon/ltc4215 b/Documentation/hwmon/ltc4215 new file mode 100644 index 000000000000..2e6a21eb656c --- /dev/null +++ b/Documentation/hwmon/ltc4215 @@ -0,0 +1,50 @@ +Kernel driver ltc4215 +===================== + +Supported chips: + * Linear Technology LTC4215 + Prefix: 'ltc4215' + Addresses scanned: 0x44 + Datasheet: + http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1163,P17572,D12697 + +Author: Ira W. Snyder <iws@ovro.caltech.edu> + + +Description +----------- + +The LTC4215 controller allows a board to be safely inserted and removed +from a live backplane. + + +Usage Notes +----------- + +This driver does not probe for LTC4215 devices, due to the fact that some +of the possible addresses are unfriendly to probing. You will need to use +the "force" parameter to tell the driver where to find the device. + +Example: the following will load the driver for an LTC4215 at address 0x44 +on I2C bus #0: +$ modprobe ltc4215 force=0,0x44 + + +Sysfs entries +------------- + +The LTC4215 has built-in limits for overvoltage, undervoltage, and +undercurrent warnings. This makes it very likely that the reference +circuit will be used. + +in1_input input voltage +in2_input output voltage + +in1_min_alarm input undervoltage alarm +in1_max_alarm input overvoltage alarm + +curr1_input current +curr1_max_alarm overcurrent alarm + +power1_input power usage +power1_alarm power bad alarm diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index aeedb89a307a..240257dd4238 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1695,6 +1695,8 @@ and is between 256 and 4096 characters. It is defined in the file See also Documentation/blockdev/paride.txt. pci=option[,option...] [PCI] various PCI subsystem options: + earlydump [X86] dump PCI config space before the kernel + changes anything off [X86] don't probe for the PCI bus bios [X86-32] force use of PCI BIOS, don't access the hardware directly. Use this if your machine @@ -1794,6 +1796,15 @@ and is between 256 and 4096 characters. It is defined in the file cbmemsize=nn[KMG] The fixed amount of bus space which is reserved for the CardBus bridge's memory window. The default value is 64 megabytes. + resource_alignment= + Format: + [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...] + Specifies alignment and device to reassign + aligned memory resources. + If <order of align> is not specified, + PAGE_SIZE is used as alignment. + PCI-PCI bridge can be specified, if resource + windows need to be expanded. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. diff --git a/Documentation/misc-devices/isl29003 b/Documentation/misc-devices/isl29003 new file mode 100644 index 000000000000..c4ff5f38e010 --- /dev/null +++ b/Documentation/misc-devices/isl29003 @@ -0,0 +1,62 @@ +Kernel driver isl29003 +===================== + +Supported chips: +* Intersil ISL29003 +Prefix: 'isl29003' +Addresses scanned: none +Datasheet: +http://www.intersil.com/data/fn/fn7464.pdf + +Author: Daniel Mack <daniel@caiaq.de> + + +Description +----------- +The ISL29003 is an integrated light sensor with a 16-bit integrating type +ADC, I2C user programmable lux range select for optimized counts/lux, and +I2C multi-function control and monitoring capabilities. The internal ADC +provides 16-bit resolution while rejecting 50Hz and 60Hz flicker caused by +artificial light sources. + +The driver allows to set the lux range, the bit resolution, the operational +mode (see below) and the power state of device and can read the current lux +value, of course. + + +Detection +--------- + +The ISL29003 does not have an ID register which could be used to identify +it, so the detection routine will just try to read from the configured I2C +addess and consider the device to be present as soon as it ACKs the +transfer. + + +Sysfs entries +------------- + +range: + 0: 0 lux to 1000 lux (default) + 1: 0 lux to 4000 lux + 2: 0 lux to 16,000 lux + 3: 0 lux to 64,000 lux + +resolution: + 0: 2^16 cycles (default) + 1: 2^12 cycles + 2: 2^8 cycles + 3: 2^4 cycles + +mode: + 0: diode1's current (unsigned 16bit) (default) + 1: diode1's current (unsigned 16bit) + 2: difference between diodes (l1 - l2, signed 15bit) + +power_state: + 0: device is disabled (default) + 1: device is enabled + +lux (read only): + returns the value from the last sensor reading + diff --git a/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt b/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt new file mode 100644 index 000000000000..c39ac2891951 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt @@ -0,0 +1,23 @@ +MMC/SD/SDIO slot directly connected to a SPI bus + +Required properties: +- compatible : should be "mmc-spi-slot". +- reg : should specify SPI address (chip-select number). +- spi-max-frequency : maximum frequency for this device (Hz). +- voltage-ranges : two cells are required, first cell specifies minimum + slot voltage (mV), second cell specifies maximum slot voltage (mV). + Several ranges could be specified. +- gpios : (optional) may specify GPIOs in this order: Card-Detect GPIO, + Write-Protect GPIO. + +Example: + + mmc-slot@0 { + compatible = "fsl,mpc8323rdb-mmc-slot", + "mmc-spi-slot"; + reg = <0>; + gpios = <&qe_pio_d 14 1 + &qe_pio_d 15 0>; + voltage-ranges = <3300 3300>; + spi-max-frequency = <50000000>; + }; diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 9e592c718afb..afa2946892da 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -81,6 +81,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.: 'i' - Send a SIGKILL to all processes, except for init. +'j' - Forcibly "Just thaw it" - filesystems frozen by the FIFREEZE ioctl. + 'k' - Secure Access Key (SAK) Kills all programs on the current virtual console. NOTE: See important comments below in SAK section. @@ -160,6 +162,9 @@ t'E'rm and k'I'll are useful if you have some sort of runaway process you are unable to kill any other way, especially if it's spawning other processes. +"'J'ust thaw it" is useful if your system becomes unresponsive due to a frozen +(probably root) filesystem via the FIFREEZE ioctl. + * Sometimes SysRq seems to get 'stuck' after using it, what can I do? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That happens to me, also. I've found that tapping shift, alt, and control diff --git a/MAINTAINERS b/MAINTAINERS index 07e6fcdcfe65..faf43fc6153f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -357,6 +357,7 @@ S: Odd Fixes for 2.4; Maintained for 2.6. P: Ivan Kokshaysky M: ink@jurassic.park.msu.ru S: Maintained for 2.4; PCI support for 2.6. +L: linux-alpha@vger.kernel.org AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER P: Thomas Dahlmann @@ -2201,25 +2202,12 @@ L: linux-ide@vger.kernel.org T: quilt kernel.org/pub/linux/kernel/people/bart/pata-2.6/ S: Maintained -IDE/ATAPI CDROM DRIVER +IDE/ATAPI DRIVERS P: Borislav Petkov M: petkovbb@gmail.com L: linux-ide@vger.kernel.org S: Maintained -IDE/ATAPI FLOPPY DRIVERS -P: Paul Bristow -M: Paul Bristow <paul@paulbristow.net> -W: http://paulbristow.net/linux/idefloppy.html -L: linux-kernel@vger.kernel.org -S: Maintained - -IDE/ATAPI TAPE DRIVERS -P: Gadi Oxman -M: Gadi Oxman <gadio@netvision.net.il> -L: linux-kernel@vger.kernel.org -S: Maintained - IDLE-I7300 P: Andy Henroid M: andrew.d.henroid@intel.com diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h index fea4ea75b79d..13cd42743810 100644 --- a/arch/alpha/include/asm/machvec.h +++ b/arch/alpha/include/asm/machvec.h @@ -80,7 +80,7 @@ struct alpha_machine_vector void (*update_irq_hw)(unsigned long, unsigned long, int); void (*ack_irq)(unsigned long); void (*device_interrupt)(unsigned long vector); - void (*machine_check)(u64 vector, u64 la); + void (*machine_check)(unsigned long vector, unsigned long la); void (*smp_callin)(void); void (*init_arch)(void); diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h index 2a14302c17a3..cb04eaa6ba33 100644 --- a/arch/alpha/include/asm/pci.h +++ b/arch/alpha/include/asm/pci.h @@ -273,4 +273,18 @@ struct pci_dev *alpha_gendev_to_pci(struct device *dev); extern struct pci_dev *isa_bridge; +extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, + size_t count); +extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, + size_t count); +extern int pci_mmap_legacy_page_range(struct pci_bus *bus, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state); +extern void pci_adjust_legacy_attr(struct pci_bus *bus, + enum pci_mmap_state mmap_type); +#define HAVE_PCI_LEGACY 1 + +extern int pci_create_resource_files(struct pci_dev *dev); +extern void pci_remove_resource_files(struct pci_dev *dev); + #endif /* __ALPHA_PCI_H */ diff --git a/arch/alpha/include/asm/system.h b/arch/alpha/include/asm/system.h index afe20fa58c99..5aa40cca4f23 100644 --- a/arch/alpha/include/asm/system.h +++ b/arch/alpha/include/asm/system.h @@ -309,518 +309,71 @@ extern int __min_ipl; #define tbia() __tbi(-2, /* no second argument */) /* - * Atomic exchange. - * Since it can be used to implement critical sections - * it must clobber "memory" (also for interrupts in UP). + * Atomic exchange routines. */ -static inline unsigned long -__xchg_u8(volatile char *m, unsigned long val) -{ - unsigned long ret, tmp, addr64; - - __asm__ __volatile__( - " andnot %4,7,%3\n" - " insbl %1,%4,%1\n" - "1: ldq_l %2,0(%3)\n" - " extbl %2,%4,%0\n" - " mskbl %2,%4,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%3)\n" - " beq %2,2f\n" -#ifdef CONFIG_SMP - " mb\n" -#endif - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) - : "r" ((long)m), "1" (val) : "memory"); - - return ret; -} - -static inline unsigned long -__xchg_u16(volatile short *m, unsigned long val) -{ - unsigned long ret, tmp, addr64; - - __asm__ __volatile__( - " andnot %4,7,%3\n" - " inswl %1,%4,%1\n" - "1: ldq_l %2,0(%3)\n" - " extwl %2,%4,%0\n" - " mskwl %2,%4,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%3)\n" - " beq %2,2f\n" -#ifdef CONFIG_SMP - " mb\n" -#endif - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) - : "r" ((long)m), "1" (val) : "memory"); - - return ret; -} - -static inline unsigned long -__xchg_u32(volatile int *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - "1: ldl_l %0,%4\n" - " bis $31,%3,%1\n" - " stl_c %1,%2\n" - " beq %1,2f\n" -#ifdef CONFIG_SMP - " mb\n" -#endif - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m) : "memory"); - - return val; -} - -static inline unsigned long -__xchg_u64(volatile long *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - "1: ldq_l %0,%4\n" - " bis $31,%3,%1\n" - " stq_c %1,%2\n" - " beq %1,2f\n" -#ifdef CONFIG_SMP - " mb\n" -#endif - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m) : "memory"); +#define __ASM__MB +#define ____xchg(type, args...) __xchg ## type ## _local(args) +#define ____cmpxchg(type, args...) __cmpxchg ## type ## _local(args) +#include <asm/xchg.h> - return val; -} - -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid xchg(). */ -extern void __xchg_called_with_bad_pointer(void); - -#define __xchg(ptr, x, size) \ -({ \ - unsigned long __xchg__res; \ - volatile void *__xchg__ptr = (ptr); \ - switch (size) { \ - case 1: __xchg__res = __xchg_u8(__xchg__ptr, x); break; \ - case 2: __xchg__res = __xchg_u16(__xchg__ptr, x); break; \ - case 4: __xchg__res = __xchg_u32(__xchg__ptr, x); break; \ - case 8: __xchg__res = __xchg_u64(__xchg__ptr, x); break; \ - default: __xchg_called_with_bad_pointer(); __xchg__res = x; \ - } \ - __xchg__res; \ -}) - -#define xchg(ptr,x) \ - ({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ +#define xchg_local(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_, \ + sizeof(*(ptr))); \ }) -static inline unsigned long -__xchg_u8_local(volatile char *m, unsigned long val) -{ - unsigned long ret, tmp, addr64; - - __asm__ __volatile__( - " andnot %4,7,%3\n" - " insbl %1,%4,%1\n" - "1: ldq_l %2,0(%3)\n" - " extbl %2,%4,%0\n" - " mskbl %2,%4,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%3)\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) - : "r" ((long)m), "1" (val) : "memory"); - - return ret; -} - -static inline unsigned long -__xchg_u16_local(volatile short *m, unsigned long val) -{ - unsigned long ret, tmp, addr64; - - __asm__ __volatile__( - " andnot %4,7,%3\n" - " inswl %1,%4,%1\n" - "1: ldq_l %2,0(%3)\n" - " extwl %2,%4,%0\n" - " mskwl %2,%4,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%3)\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) - : "r" ((long)m), "1" (val) : "memory"); - - return ret; -} - -static inline unsigned long -__xchg_u32_local(volatile int *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - "1: ldl_l %0,%4\n" - " bis $31,%3,%1\n" - " stl_c %1,%2\n" - " beq %1,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m) : "memory"); - - return val; -} - -static inline unsigned long -__xchg_u64_local(volatile long *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - "1: ldq_l %0,%4\n" - " bis $31,%3,%1\n" - " stq_c %1,%2\n" - " beq %1,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - : "=&r" (val), "=&r" (dummy), "=m" (*m) - : "rI" (val), "m" (*m) : "memory"); - - return val; -} - -#define __xchg_local(ptr, x, size) \ -({ \ - unsigned long __xchg__res; \ - volatile void *__xchg__ptr = (ptr); \ - switch (size) { \ - case 1: __xchg__res = __xchg_u8_local(__xchg__ptr, x); break; \ - case 2: __xchg__res = __xchg_u16_local(__xchg__ptr, x); break; \ - case 4: __xchg__res = __xchg_u32_local(__xchg__ptr, x); break; \ - case 8: __xchg__res = __xchg_u64_local(__xchg__ptr, x); break; \ - default: __xchg_called_with_bad_pointer(); __xchg__res = x; \ - } \ - __xchg__res; \ -}) - -#define xchg_local(ptr,x) \ - ({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_, \ - sizeof(*(ptr))); \ +#define cmpxchg_local(ptr, o, n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, \ + sizeof(*(ptr))); \ }) -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - * - * The memory barrier should be placed in SMP only when we actually - * make the change. If we don't change anything (so if the returned - * prev is equal to old) then we aren't acquiring anything new and - * we don't need any memory barrier as far I can tell. - */ - -#define __HAVE_ARCH_CMPXCHG 1 - -static inline unsigned long -__cmpxchg_u8(volatile char *m, long old, long new) -{ - unsigned long prev, tmp, cmp, addr64; - - __asm__ __volatile__( - " andnot %5,7,%4\n" - " insbl %1,%5,%1\n" - "1: ldq_l %2,0(%4)\n" - " extbl %2,%5,%0\n" - " cmpeq %0,%6,%3\n" - " beq %3,2f\n" - " mskbl %2,%5,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%4)\n" - " beq %2,3f\n" -#ifdef CONFIG_SMP - " mb\n" -#endif - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) - : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); - - return prev; -} - -static inline unsigned long -__cmpxchg_u16(volatile short *m, long old, long new) -{ - unsigned long prev, tmp, cmp, addr64; - - __asm__ __volatile__( - " andnot %5,7,%4\n" - " inswl %1,%5,%1\n" - "1: ldq_l %2,0(%4)\n" - " extwl %2,%5,%0\n" - " cmpeq %0,%6,%3\n" - " beq %3,2f\n" - " mskwl %2,%5,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%4)\n" - " beq %2,3f\n" -#ifdef CONFIG_SMP - " mb\n" -#endif - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) - : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); - - return prev; -} - -static inline unsigned long -__cmpxchg_u32(volatile int *m, int old, int new) -{ - unsigned long prev, cmp; - - __asm__ __volatile__( - "1: ldl_l %0,%5\n" - " cmpeq %0,%3,%1\n" - " beq %1,2f\n" - " mov %4,%1\n" - " stl_c %1,%2\n" - " beq %1,3f\n" -#ifdef CONFIG_SMP - " mb\n" -#endif - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m) : "memory"); - - return prev; -} +#define cmpxchg64_local(ptr, o, n) \ + ({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_local((ptr), (o), (n)); \ + }) -static inline unsigned long -__cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new) -{ - unsigned long prev, cmp; - - __asm__ __volatile__( - "1: ldq_l %0,%5\n" - " cmpeq %0,%3,%1\n" - " beq %1,2f\n" - " mov %4,%1\n" - " stq_c %1,%2\n" - " beq %1,3f\n" #ifdef CONFIG_SMP - " mb\n" +#undef __ASM__MB +#define __ASM__MB "\tmb\n" #endif - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m) : "memory"); - - return prev; -} - -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid cmpxchg(). */ -extern void __cmpxchg_called_with_bad_pointer(void); - -static __always_inline unsigned long -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) -{ - switch (size) { - case 1: - return __cmpxchg_u8(ptr, old, new); - case 2: - return __cmpxchg_u16(ptr, old, new); - case 4: - return __cmpxchg_u32(ptr, old, new); - case 8: - return __cmpxchg_u64(ptr, old, new); - } - __cmpxchg_called_with_bad_pointer(); - return old; -} - -#define cmpxchg(ptr, o, n) \ - ({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ - (unsigned long)_n_, sizeof(*(ptr))); \ +#undef ____xchg +#undef ____cmpxchg +#define ____xchg(type, args...) __xchg ##type(args) +#define ____cmpxchg(type, args...) __cmpxchg ##type(args) +#include <asm/xchg.h> + +#define xchg(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, \ + sizeof(*(ptr))); \ }) -#define cmpxchg64(ptr, o, n) \ - ({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg((ptr), (o), (n)); \ - }) - -static inline unsigned long -__cmpxchg_u8_local(volatile char *m, long old, long new) -{ - unsigned long prev, tmp, cmp, addr64; - - __asm__ __volatile__( - " andnot %5,7,%4\n" - " insbl %1,%5,%1\n" - "1: ldq_l %2,0(%4)\n" - " extbl %2,%5,%0\n" - " cmpeq %0,%6,%3\n" - " beq %3,2f\n" - " mskbl %2,%5,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%4)\n" - " beq %2,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) - : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); - - return prev; -} - -static inline unsigned long -__cmpxchg_u16_local(volatile short *m, long old, long new) -{ - unsigned long prev, tmp, cmp, addr64; - - __asm__ __volatile__( - " andnot %5,7,%4\n" - " inswl %1,%5,%1\n" - "1: ldq_l %2,0(%4)\n" - " extwl %2,%5,%0\n" - " cmpeq %0,%6,%3\n" - " beq %3,2f\n" - " mskwl %2,%5,%2\n" - " or %1,%2,%2\n" - " stq_c %2,0(%4)\n" - " beq %2,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) - : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); - - return prev; -} - -static inline unsigned long -__cmpxchg_u32_local(volatile int *m, int old, int new) -{ - unsigned long prev, cmp; - - __asm__ __volatile__( - "1: ldl_l %0,%5\n" - " cmpeq %0,%3,%1\n" - " beq %1,2f\n" - " mov %4,%1\n" - " stl_c %1,%2\n" - " beq %1,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m) : "memory"); - - return prev; -} - -static inline unsigned long -__cmpxchg_u64_local(volatile long *m, unsigned long old, unsigned long new) -{ - unsigned long prev, cmp; - - __asm__ __volatile__( - "1: ldq_l %0,%5\n" - " cmpeq %0,%3,%1\n" - " beq %1,2f\n" - " mov %4,%1\n" - " stq_c %1,%2\n" - " beq %1,3f\n" - "2:\n" - ".subsection 2\n" - "3: br 1b\n" - ".previous" - : "=&r"(prev), "=&r"(cmp), "=m"(*m) - : "r"((long) old), "r"(new), "m"(*m) : "memory"); - - return prev; -} - -static __always_inline unsigned long -__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, - int size) -{ - switch (size) { - case 1: - return __cmpxchg_u8_local(ptr, old, new); - case 2: - return __cmpxchg_u16_local(ptr, old, new); - case 4: - return __cmpxchg_u32_local(ptr, old, new); - case 8: - return __cmpxchg_u64_local(ptr, old, new); - } - __cmpxchg_called_with_bad_pointer(); - return old; -} -#define cmpxchg_local(ptr, o, n) \ - ({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \ - (unsigned long)_n_, sizeof(*(ptr))); \ +#define cmpxchg(ptr, o, n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr)));\ }) -#define cmpxchg64_local(ptr, o, n) \ - ({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg_local((ptr), (o), (n)); \ + +#define cmpxchg64(ptr, o, n) \ + ({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ }) +#undef __ASM__MB +#undef ____cmpxchg + +#define __HAVE_ARCH_CMPXCHG 1 #endif /* __ASSEMBLY__ */ diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h index c1541353ccef..f072f344497e 100644 --- a/arch/alpha/include/asm/types.h +++ b/arch/alpha/include/asm/types.h @@ -8,7 +8,12 @@ * not a major issue. However, for interoperability, libraries still * need to be careful to avoid a name clashes. */ + +#ifdef __KERNEL__ +#include <asm-generic/int-ll64.h> +#else #include <asm-generic/int-l64.h> +#endif #ifndef __ASSEMBLY__ diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index 22de3b434a22..163f3053001c 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -498,13 +498,13 @@ struct exception_table_entry }; /* Returns the new pc */ -#define fixup_exception(map_reg, fixup, pc) \ +#define fixup_exception(map_reg, _fixup, pc) \ ({ \ - if ((fixup)->fixup.bits.valreg != 31) \ - map_reg((fixup)->fixup.bits.valreg) = 0; \ - if ((fixup)->fixup.bits.errreg != 31) \ - map_reg((fixup)->fixup.bits.errreg) = -EFAULT; \ - (pc) + (fixup)->fixup.bits.nextinsn; \ + if ((_fixup)->fixup.bits.valreg != 31) \ + map_reg((_fixup)->fixup.bits.valreg) = 0; \ + if ((_fixup)->fixup.bits.errreg != 31) \ + map_reg((_fixup)->fixup.bits.errreg) = -EFAULT; \ + (pc) + (_fixup)->fixup.bits.nextinsn; \ }) diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h new file mode 100644 index 000000000000..beba1b803e0d --- /dev/null +++ b/arch/alpha/include/asm/xchg.h @@ -0,0 +1,258 @@ +#ifndef __ALPHA_SYSTEM_H +#error Do not include xchg.h directly! +#else +/* + * xchg/xchg_local and cmpxchg/cmpxchg_local share the same code + * except that local version do not have the expensive memory barrier. + * So this file is included twice from asm/system.h. + */ + +/* + * Atomic exchange. + * Since it can be used to implement critical sections + * it must clobber "memory" (also for interrupts in UP). + */ + +static inline unsigned long +____xchg(_u8, volatile char *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " insbl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extbl %2,%4,%0\n" + " mskbl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + __ASM__MB + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +____xchg(_u16, volatile short *m, unsigned long val) +{ + unsigned long ret, tmp, addr64; + + __asm__ __volatile__( + " andnot %4,7,%3\n" + " inswl %1,%4,%1\n" + "1: ldq_l %2,0(%3)\n" + " extwl %2,%4,%0\n" + " mskwl %2,%4,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%3)\n" + " beq %2,2f\n" + __ASM__MB + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) + : "r" ((long)m), "1" (val) : "memory"); + + return ret; +} + +static inline unsigned long +____xchg(_u32, volatile int *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldl_l %0,%4\n" + " bis $31,%3,%1\n" + " stl_c %1,%2\n" + " beq %1,2f\n" + __ASM__MB + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +static inline unsigned long +____xchg(_u64, volatile long *m, unsigned long val) +{ + unsigned long dummy; + + __asm__ __volatile__( + "1: ldq_l %0,%4\n" + " bis $31,%3,%1\n" + " stq_c %1,%2\n" + " beq %1,2f\n" + __ASM__MB + ".subsection 2\n" + "2: br 1b\n" + ".previous" + : "=&r" (val), "=&r" (dummy), "=m" (*m) + : "rI" (val), "m" (*m) : "memory"); + + return val; +} + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid xchg(). */ +extern void __xchg_called_with_bad_pointer(void); + +static __always_inline unsigned long +____xchg(, volatile void *ptr, unsigned long x, int size) +{ + switch (size) { + case 1: + return ____xchg(_u8, ptr, x); + case 2: + return ____xchg(_u16, ptr, x); + case 4: + return ____xchg(_u32, ptr, x); + case 8: + return ____xchg(_u64, ptr, x); + } + __xchg_called_with_bad_pointer(); + return x; +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + * + * The memory barrier should be placed in SMP only when we actually + * make the change. If we don't change anything (so if the returned + * prev is equal to old) then we aren't acquiring anything new and + * we don't need any memory barrier as far I can tell. + */ + +static inline unsigned long +____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " insbl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extbl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskbl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + __ASM__MB + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) +{ + unsigned long prev, tmp, cmp, addr64; + + __asm__ __volatile__( + " andnot %5,7,%4\n" + " inswl %1,%5,%1\n" + "1: ldq_l %2,0(%4)\n" + " extwl %2,%5,%0\n" + " cmpeq %0,%6,%3\n" + " beq %3,2f\n" + " mskwl %2,%5,%2\n" + " or %1,%2,%2\n" + " stq_c %2,0(%4)\n" + " beq %2,3f\n" + __ASM__MB + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) + : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg(_u32, volatile int *m, int old, int new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldl_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stl_c %1,%2\n" + " beq %1,3f\n" + __ASM__MB + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +static inline unsigned long +____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) +{ + unsigned long prev, cmp; + + __asm__ __volatile__( + "1: ldq_l %0,%5\n" + " cmpeq %0,%3,%1\n" + " beq %1,2f\n" + " mov %4,%1\n" + " stq_c %1,%2\n" + " beq %1,3f\n" + __ASM__MB + "2:\n" + ".subsection 2\n" + "3: br 1b\n" + ".previous" + : "=&r"(prev), "=&r"(cmp), "=m"(*m) + : "r"((long) old), "r"(new), "m"(*m) : "memory"); + + return prev; +} + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +static __always_inline unsigned long +____cmpxchg(, volatile void *ptr, unsigned long old, unsigned long new, + int size) +{ + switch (size) { + case 1: + return ____cmpxchg(_u8, ptr, old, new); + case 2: + return ____cmpxchg(_u16, ptr, old, new); + case 4: + return ____cmpxchg(_u32, ptr, old, new); + case 8: + return ____cmpxchg(_u64, ptr, old, new); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#endif diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index b4697759a123..a427538252f8 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -12,7 +12,7 @@ obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_PCI) += pci.o pci_iommu.o +obj-$(CONFIG_PCI) += pci.o pci_iommu.o pci-sysfs.o obj-$(CONFIG_SRM_ENV) += srm_env.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c index 11aee012a8ae..985e5c1681ac 100644 --- a/arch/alpha/kernel/err_ev6.c +++ b/arch/alpha/kernel/err_ev6.c @@ -157,8 +157,8 @@ ev6_parse_cbox(u64 c_addr, u64 c1_syn, u64 c2_syn, err_print_prefix, streamname[stream], bitsname[bits], sourcename[source]); - printk("%s Address: 0x%016lx\n" - " Syndrome[upper.lower]: %02lx.%02lx\n", + printk("%s Address: 0x%016llx\n" + " Syndrome[upper.lower]: %02llx.%02llx\n", err_print_prefix, c_addr, c2_syn, c1_syn); diff --git a/arch/alpha/kernel/err_ev7.c b/arch/alpha/kernel/err_ev7.c index 68cd493f54c5..73770c6ca013 100644 --- a/arch/alpha/kernel/err_ev7.c +++ b/arch/alpha/kernel/err_ev7.c @@ -246,13 +246,13 @@ ev7_process_pal_subpacket(struct el_subpacket *header) switch(header->type) { case EL_TYPE__PAL__LOGOUT_FRAME: - printk("%s*** MCHK occurred on LPID %ld (RBOX %lx)\n", + printk("%s*** MCHK occurred on LPID %ld (RBOX %llx)\n", err_print_prefix, packet->by_type.logout.whami, packet->by_type.logout.rbox_whami); el_print_timestamp(&packet->by_type.logout.timestamp); - printk("%s EXC_ADDR: %016lx\n" - " HALT_CODE: %lx\n", + printk("%s EXC_ADDR: %016llx\n" + " HALT_CODE: %llx\n", err_print_prefix, packet->by_type.logout.exc_addr, packet->by_type.logout.halt_code); diff --git a/arch/alpha/kernel/err_marvel.c b/arch/alpha/kernel/err_marvel.c index 413bf37eb094..6bfd243efba3 100644 --- a/arch/alpha/kernel/err_marvel.c +++ b/arch/alpha/kernel/err_marvel.c @@ -129,7 +129,7 @@ marvel_print_po7_crrct_sym(u64 crrct_sym) printk("%s Correctable Error Symptoms:\n" - "%s Syndrome: 0x%lx\n", + "%s Syndrome: 0x%llx\n", err_print_prefix, err_print_prefix, EXTRACT(crrct_sym, IO7__PO7_CRRCT_SYM__SYN)); marvel_print_err_cyc(EXTRACT(crrct_sym, IO7__PO7_CRRCT_SYM__ERR_CYC)); @@ -186,7 +186,7 @@ marvel_print_po7_uncrr_sym(u64 uncrr_sym, u64 valid_mask) uncrr_sym &= valid_mask; if (EXTRACT(valid_mask, IO7__PO7_UNCRR_SYM__SYN)) - printk("%s Syndrome: 0x%lx\n", + printk("%s Syndrome: 0x%llx\n", err_print_prefix, EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__SYN)); @@ -307,7 +307,7 @@ marvel_print_po7_ugbge_sym(u64 ugbge_sym) sprintf(opcode_str, "BlkIO"); break; default: - sprintf(opcode_str, "0x%lx\n", + sprintf(opcode_str, "0x%llx\n", EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE)); break; } @@ -321,7 +321,7 @@ marvel_print_po7_ugbge_sym(u64 ugbge_sym) opcode_str); if (0xC5 != EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE)) - printk("%s Packet Offset 0x%08lx\n", + printk("%s Packet Offset 0x%08llx\n", err_print_prefix, EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_PKT_OFF)); } @@ -480,8 +480,8 @@ marvel_print_po7_err_sum(struct ev7_pal_io_subpacket *io) printk("%s Lost Error\n", err_print_prefix); printk("%s Failing Packet:\n" - "%s Cycle 1: %016lx\n" - "%s Cycle 2: %016lx\n", + "%s Cycle 1: %016llx\n" + "%s Cycle 2: %016llx\n", err_print_prefix, err_print_prefix, io->po7_err_pkt0, err_print_prefix, io->po7_err_pkt1); @@ -515,9 +515,9 @@ marvel_print_pox_tlb_err(u64 tlb_err) if (!(tlb_err & IO7__POX_TLBERR__ERR_VALID)) return; - printk("%s TLB Error on index 0x%lx:\n" + printk("%s TLB Error on index 0x%llx:\n" "%s - %s\n" - "%s - Addr: 0x%016lx\n", + "%s - Addr: 0x%016llx\n", err_print_prefix, EXTRACT(tlb_err, IO7__POX_TLBERR__ERR_TLB_PTR), err_print_prefix, @@ -579,7 +579,7 @@ marvel_print_pox_spl_cmplt(u64 spl_cmplt) sprintf(message, "Uncorrectable Split Write Data Error"); break; default: - sprintf(message, "%08lx\n", + sprintf(message, "%08llx\n", EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__MESSAGE)); break; } @@ -620,9 +620,9 @@ marvel_print_pox_trans_sum(u64 trans_sum) return; printk("%s Transaction Summary:\n" - "%s Command: 0x%lx - %s\n" - "%s Address: 0x%016lx%s\n" - "%s PCI-X Master Slot: 0x%lx\n", + "%s Command: 0x%llx - %s\n" + "%s Address: 0x%016llx%s\n" + "%s PCI-X Master Slot: 0x%llx\n", err_print_prefix, err_print_prefix, EXTRACT(trans_sum, IO7__POX_TRANSUM__PCIX_CMD), @@ -964,12 +964,12 @@ marvel_process_io_error(struct ev7_lf_subpackets *lf_subpackets, int print) #if 0 printk("%s PORT 7 ERROR:\n" - "%s PO7_ERROR_SUM: %016lx\n" - "%s PO7_UNCRR_SYM: %016lx\n" - "%s PO7_CRRCT_SYM: %016lx\n" - "%s PO7_UGBGE_SYM: %016lx\n" - "%s PO7_ERR_PKT0: %016lx\n" - "%s PO7_ERR_PKT1: %016lx\n", + "%s PO7_ERROR_SUM: %016llx\n" + "%s PO7_UNCRR_SYM: %016llx\n" + "%s PO7_CRRCT_SYM: %016llx\n" + "%s PO7_UGBGE_SYM: %016llx\n" + "%s PO7_ERR_PKT0: %016llx\n" + "%s PO7_ERR_PKT1: %016llx\n", err_print_prefix, err_print_prefix, io->po7_error_sum, err_print_prefix, io->po7_uncrr_sym, @@ -987,12 +987,12 @@ marvel_process_io_error(struct ev7_lf_subpackets *lf_subpackets, int print) if (!MARVEL_IO_ERR_VALID(io->ports[i].pox_err_sum)) continue; - printk("%s PID %u PORT %d POx_ERR_SUM: %016lx\n", + printk("%s PID %u PORT %d POx_ERR_SUM: %016llx\n", err_print_prefix, lf_subpackets->io_pid, i, io->ports[i].pox_err_sum); marvel_print_pox_err(io->ports[i].pox_err_sum, &io->ports[i]); - printk("%s [ POx_FIRST_ERR: %016lx ]\n", + printk("%s [ POx_FIRST_ERR: %016llx ]\n", err_print_prefix, io->ports[i].pox_first_err); marvel_print_pox_err(io->ports[i].pox_first_err, &io->ports[i]); diff --git a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c index 257449ed15ef..c7e28a88d6e3 100644 --- a/arch/alpha/kernel/err_titan.c +++ b/arch/alpha/kernel/err_titan.c @@ -107,12 +107,12 @@ titan_parse_p_serror(int which, u64 serror, int print) if (!print) return status; - printk("%s PChip %d SERROR: %016lx\n", + printk("%s PChip %d SERROR: %016llx\n", err_print_prefix, which, serror); if (serror & TITAN__PCHIP_SERROR__ECCMASK) { printk("%s %sorrectable ECC Error:\n" " Source: %-6s Command: %-8s Syndrome: 0x%08x\n" - " Address: 0x%lx\n", + " Address: 0x%llx\n", err_print_prefix, (serror & TITAN__PCHIP_SERROR__UECC) ? "Unc" : "C", serror_src[EXTRACT(serror, TITAN__PCHIP_SERROR__SRC)], @@ -223,7 +223,7 @@ titan_parse_p_perror(int which, int port, u64 perror, int print) if (!print) return status; - printk("%s PChip %d %cPERROR: %016lx\n", + printk("%s PChip %d %cPERROR: %016llx\n", err_print_prefix, which, port ? 'A' : 'G', perror); if (perror & TITAN__PCHIP_PERROR__IPTPW) @@ -316,7 +316,7 @@ titan_parse_p_agperror(int which, u64 agperror, int print) addr = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__ADDR) << 3; len = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__LEN); - printk("%s PChip %d AGPERROR: %016lx\n", err_print_prefix, + printk("%s PChip %d AGPERROR: %016llx\n", err_print_prefix, which, agperror); if (agperror & TITAN__PCHIP_AGPERROR__NOWINDOW) printk("%s No Window\n", err_print_prefix); @@ -597,16 +597,16 @@ privateer_process_680_frame(struct el_common *mchk_header, int print) return status; /* TODO - decode instead of just dumping... */ - printk("%s Summary Flags: %016lx\n" - " CChip DIRx: %016lx\n" - " System Management IR: %016lx\n" - " CPU IR: %016lx\n" - " Power Supply IR: %016lx\n" - " LM78 Fault Status: %016lx\n" - " System Doors: %016lx\n" - " Temperature Warning: %016lx\n" - " Fan Control: %016lx\n" - " Fatal Power Down Code: %016lx\n", + printk("%s Summary Flags: %016llx\n" + " CChip DIRx: %016llx\n" + " System Management IR: %016llx\n" + " CPU IR: %016llx\n" + " Power Supply IR: %016llx\n" + " LM78 Fault Status: %016llx\n" + " System Doors: %016llx\n" + " Temperature Warning: %016llx\n" + " Fan Control: %016llx\n" + " Fatal Power Down Code: %016llx\n", err_print_prefix, emchk->summary, emchk->c_dirx, diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c new file mode 100644 index 000000000000..6ea822e7f724 --- /dev/null +++ b/arch/alpha/kernel/pci-sysfs.c @@ -0,0 +1,366 @@ +/* + * arch/alpha/kernel/pci-sysfs.c + * + * Copyright (C) 2009 Ivan Kokshaysky + * + * Alpha PCI resource files. + * + * Loosely based on generic HAVE_PCI_MMAP implementation in + * drivers/pci/pci-sysfs.c + */ + +#include <linux/sched.h> +#include <linux/pci.h> + +static int hose_mmap_page_range(struct pci_controller *hose, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_type, int sparse) +{ + unsigned long base; + + if (mmap_type == pci_mmap_mem) + base = sparse ? hose->sparse_mem_base : hose->dense_mem_base; + else + base = sparse ? hose->sparse_io_base : hose->dense_io_base; + + vma->vm_pgoff += base >> PAGE_SHIFT; + vma->vm_flags |= (VM_IO | VM_RESERVED); + + return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static int __pci_mmap_fits(struct pci_dev *pdev, int num, + struct vm_area_struct *vma, int sparse) +{ + unsigned long nr, start, size; + int shift = sparse ? 5 : 0; + + nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + start = vma->vm_pgoff; + size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1; + + if (start < size && size - start >= nr) + return 1; + WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on %s BAR %d " + "(size 0x%08lx)\n", + current->comm, sparse ? " sparse" : "", start, start + nr, + pci_name(pdev), num, size); + return 0; +} + +/** + * pci_mmap_resource - map a PCI resource into user memory space + * @kobj: kobject for mapping + * @attr: struct bin_attribute for the file being mapped + * @vma: struct vm_area_struct passed into the mmap + * @sparse: address space type + * + * Use the bus mapping routines to map a PCI resource into userspace. + */ +static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, + struct vm_area_struct *vma, int sparse) +{ + struct pci_dev *pdev = to_pci_dev(container_of(kobj, + struct device, kobj)); + struct resource *res = (struct resource *)attr->private; + enum pci_mmap_state mmap_type; + struct pci_bus_region bar; + int i; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) + if (res == &pdev->resource[i]) + break; + if (i >= PCI_ROM_RESOURCE) + return -ENODEV; + + if (!__pci_mmap_fits(pdev, i, vma, sparse)) + return -EINVAL; + + if (iomem_is_exclusive(res->start)) + return -EINVAL; + + pcibios_resource_to_bus(pdev, &bar, res); + vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0)); + mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; + + return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse); +} + +static int pci_mmap_resource_sparse(struct kobject *kobj, + struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 1); +} + +static int pci_mmap_resource_dense(struct kobject *kobj, + struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 0); +} + +/** + * pci_remove_resource_files - cleanup resource files + * @dev: dev to cleanup + * + * If we created resource files for @dev, remove them from sysfs and + * free their resources. + */ +void pci_remove_resource_files(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + struct bin_attribute *res_attr; + + res_attr = pdev->res_attr[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } + + res_attr = pdev->res_attr_wc[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } + } +} + +static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num) +{ + struct pci_bus_region bar; + struct pci_controller *hose = pdev->sysdata; + long dense_offset; + unsigned long sparse_size; + + pcibios_resource_to_bus(pdev, &bar, &pdev->resource[num]); + + /* All core logic chips have 4G sparse address space, except + CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM + definitions in asm/core_xxx.h files). This corresponds + to 128M or 512M of the bus space. */ + dense_offset = (long)(hose->dense_mem_base - hose->sparse_mem_base); + sparse_size = dense_offset >= 0x400000000UL ? 0x20000000 : 0x8000000; + + return bar.end < sparse_size; +} + +static int pci_create_one_attr(struct pci_dev *pdev, int num, char *name, + char *suffix, struct bin_attribute *res_attr, + unsigned long sparse) +{ + size_t size = pci_resource_len(pdev, num); + + sprintf(name, "resource%d%s", num, suffix); + res_attr->mmap = sparse ? pci_mmap_resource_sparse : + pci_mmap_resource_dense; + res_attr->attr.name = name; + res_attr->attr.mode = S_IRUSR | S_IWUSR; + res_attr->size = sparse ? size << 5 : size; + res_attr->private = &pdev->resource[num]; + return sysfs_create_bin_file(&pdev->dev.kobj, res_attr); +} + +static int pci_create_attr(struct pci_dev *pdev, int num) +{ + /* allocate attribute structure, piggyback attribute name */ + int retval, nlen1, nlen2 = 0, res_count = 1; + unsigned long sparse_base, dense_base; + struct bin_attribute *attr; + struct pci_controller *hose = pdev->sysdata; + char *suffix, *attr_name; + + suffix = ""; /* Assume bwx machine, normal resourceN files. */ + nlen1 = 10; + + if (pdev->resource[num].flags & IORESOURCE_MEM) { + sparse_base = hose->sparse_mem_base; + dense_base = hose->dense_mem_base; + if (sparse_base && !sparse_mem_mmap_fits(pdev, num)) { + sparse_base = 0; + suffix = "_dense"; + nlen1 = 16; /* resourceN_dense */ + } + } else { + sparse_base = hose->sparse_io_base; + dense_base = hose->dense_io_base; + } + + if (sparse_base) { + suffix = "_sparse"; + nlen1 = 17; + if (dense_base) { + nlen2 = 16; /* resourceN_dense */ + res_count = 2; + } + } + + attr = kzalloc(sizeof(*attr) * res_count + nlen1 + nlen2, GFP_ATOMIC); + if (!attr) + return -ENOMEM; + + /* Create bwx, sparse or single dense file */ + attr_name = (char *)(attr + res_count); + pdev->res_attr[num] = attr; + retval = pci_create_one_attr(pdev, num, attr_name, suffix, attr, + sparse_base); + if (retval || res_count == 1) + return retval; + + /* Create dense file */ + attr_name += nlen1; + attr++; + pdev->res_attr_wc[num] = attr; + return pci_create_one_attr(pdev, num, attr_name, "_dense", attr, 0); +} + +/** + * pci_create_resource_files - create resource files in sysfs for @dev + * @dev: dev in question + * + * Walk the resources in @dev creating files for each resource available. + */ +int pci_create_resource_files(struct pci_dev *pdev) +{ + int i; + int retval; + + /* Expose the PCI resources from this device as files */ + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + + /* skip empty resources */ + if (!pci_resource_len(pdev, i)) + continue; + + retval = pci_create_attr(pdev, i); + if (retval) { + pci_remove_resource_files(pdev); + return retval; + } + } + return 0; +} + +/* Legacy I/O bus mapping stuff. */ + +static int __legacy_mmap_fits(struct pci_controller *hose, + struct vm_area_struct *vma, + unsigned long res_size, int sparse) +{ + unsigned long nr, start, size; + + nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + start = vma->vm_pgoff; + size = ((res_size - 1) >> PAGE_SHIFT) + 1; + + if (start < size && size - start >= nr) + return 1; + WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on hose %d " + "(size 0x%08lx)\n", + current->comm, sparse ? " sparse" : "", start, start + nr, + hose->index, size); + return 0; +} + +static inline int has_sparse(struct pci_controller *hose, + enum pci_mmap_state mmap_type) +{ + unsigned long base; + + base = (mmap_type == pci_mmap_mem) ? hose->sparse_mem_base : + hose->sparse_io_base; + + return base != 0; +} + +int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma, + enum pci_mmap_state mmap_type) +{ + struct pci_controller *hose = bus->sysdata; + int sparse = has_sparse(hose, mmap_type); + unsigned long res_size; + + res_size = (mmap_type == pci_mmap_mem) ? bus->legacy_mem->size : + bus->legacy_io->size; + if (!__legacy_mmap_fits(hose, vma, res_size, sparse)) + return -EINVAL; + + return hose_mmap_page_range(hose, vma, mmap_type, sparse); +} + +/** + * pci_adjust_legacy_attr - adjustment of legacy file attributes + * @b: bus to create files under + * @mmap_type: I/O port or memory + * + * Adjust file name and size for sparse mappings. + */ +void pci_adjust_legacy_attr(struct pci_bus *bus, enum pci_mmap_state mmap_type) +{ + struct pci_controller *hose = bus->sysdata; + + if (!has_sparse(hose, mmap_type)) + return; + + if (mmap_type == pci_mmap_mem) { + bus->legacy_mem->attr.name = "legacy_mem_sparse"; + bus->legacy_mem->size <<= 5; + } else { + bus->legacy_io->attr.name = "legacy_io_sparse"; + bus->legacy_io->size <<= 5; + } + return; +} + +/* Legacy I/O bus read/write functions */ +int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size) +{ + struct pci_controller *hose = bus->sysdata; + + port += hose->io_space->start; + + switch(size) { + case 1: + *((u8 *)val) = inb(port); + return 1; + case 2: + if (port & 1) + return -EINVAL; + *((u16 *)val) = inw(port); + return 2; + case 4: + if (port & 3) + return -EINVAL; + *((u32 *)val) = inl(port); + return 4; + } + return -EINVAL; +} + +int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size) +{ + struct pci_controller *hose = bus->sysdata; + + port += hose->io_space->start; + + switch(size) { + case 1: + outb(port, val); + return 1; + case 2: + if (port & 1) + return -EINVAL; + outw(port, val); + return 2; + case 4: + if (port & 3) + return -EINVAL; + outl(port, val); + return 4; + } + return -EINVAL; +} diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index a3b938811400..a91ba28999b5 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -168,7 +168,7 @@ pcibios_align_resource(void *data, struct resource *res, */ /* Align to multiple of size of minimum base. */ - alignto = max(0x1000UL, align); + alignto = max_t(resource_size_t, 0x1000, align); start = ALIGN(start, alignto); if (hose->sparse_mem_base && size <= 7 * 16*MB) { if (((start / (16*MB)) & 0x7) == 0) { diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index b9094da05d7a..bfb880af959d 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -247,7 +247,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, && paddr + size <= __direct_map_size) { ret = paddr + __direct_map_base; - DBGA2("pci_map_single: [%p,%lx] -> direct %lx from %p\n", + DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %p\n", cpu_addr, size, ret, __builtin_return_address(0)); return ret; @@ -258,7 +258,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, if (dac_allowed) { ret = paddr + alpha_mv.pci_dac_offset; - DBGA2("pci_map_single: [%p,%lx] -> DAC %lx from %p\n", + DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %p\n", cpu_addr, size, ret, __builtin_return_address(0)); return ret; @@ -299,7 +299,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, ret = arena->dma_base + dma_ofs * PAGE_SIZE; ret += (unsigned long)cpu_addr & ~PAGE_MASK; - DBGA2("pci_map_single: [%p,%lx] np %ld -> sg %lx from %p\n", + DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %p\n", cpu_addr, size, npages, ret, __builtin_return_address(0)); return ret; @@ -355,14 +355,14 @@ pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size, && dma_addr < __direct_map_base + __direct_map_size) { /* Nothing to do. */ - DBGA2("pci_unmap_single: direct [%lx,%lx] from %p\n", + DBGA2("pci_unmap_single: direct [%llx,%zx] from %p\n", dma_addr, size, __builtin_return_address(0)); return; } if (dma_addr > 0xffffffff) { - DBGA2("pci64_unmap_single: DAC [%lx,%lx] from %p\n", + DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %p\n", dma_addr, size, __builtin_return_address(0)); return; } @@ -373,9 +373,9 @@ pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size, dma_ofs = (dma_addr - arena->dma_base) >> PAGE_SHIFT; if (dma_ofs * PAGE_SIZE >= arena->size) { - printk(KERN_ERR "Bogus pci_unmap_single: dma_addr %lx " - " base %lx size %x\n", dma_addr, arena->dma_base, - arena->size); + printk(KERN_ERR "Bogus pci_unmap_single: dma_addr %llx " + " base %llx size %x\n", + dma_addr, arena->dma_base, arena->size); return; BUG(); } @@ -394,7 +394,7 @@ pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size, spin_unlock_irqrestore(&arena->lock, flags); - DBGA2("pci_unmap_single: sg [%lx,%lx] np %ld from %p\n", + DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %p\n", dma_addr, size, npages, __builtin_return_address(0)); } EXPORT_SYMBOL(pci_unmap_single); @@ -444,7 +444,7 @@ try_again: goto try_again; } - DBGA2("pci_alloc_consistent: %lx -> [%p,%x] from %p\n", + DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %p\n", size, cpu_addr, *dma_addrp, __builtin_return_address(0)); return cpu_addr; @@ -464,7 +464,7 @@ pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu_addr, pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); free_pages((unsigned long)cpu_addr, get_order(size)); - DBGA2("pci_free_consistent: [%x,%lx] from %p\n", + DBGA2("pci_free_consistent: [%llx,%zx] from %p\n", dma_addr, size, __builtin_return_address(0)); } EXPORT_SYMBOL(pci_free_consistent); @@ -551,7 +551,7 @@ sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end, out->dma_address = paddr + __direct_map_base; out->dma_length = size; - DBGA(" sg_fill: [%p,%lx] -> direct %lx\n", + DBGA(" sg_fill: [%p,%lx] -> direct %llx\n", __va(paddr), size, out->dma_address); return 0; @@ -563,7 +563,7 @@ sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end, out->dma_address = paddr + alpha_mv.pci_dac_offset; out->dma_length = size; - DBGA(" sg_fill: [%p,%lx] -> DAC %lx\n", + DBGA(" sg_fill: [%p,%lx] -> DAC %llx\n", __va(paddr), size, out->dma_address); return 0; @@ -589,7 +589,7 @@ sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end, out->dma_address = arena->dma_base + dma_ofs*PAGE_SIZE + paddr; out->dma_length = size; - DBGA(" sg_fill: [%p,%lx] -> sg %lx np %ld\n", + DBGA(" sg_fill: [%p,%lx] -> sg %llx np %ld\n", __va(paddr), size, out->dma_address, npages); /* All virtually contiguous. We need to find the length of each @@ -752,7 +752,7 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, if (addr > 0xffffffff) { /* It's a DAC address -- nothing to do. */ - DBGA(" (%ld) DAC [%lx,%lx]\n", + DBGA(" (%ld) DAC [%llx,%zx]\n", sg - end + nents, addr, size); continue; } @@ -760,12 +760,12 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, if (addr >= __direct_map_base && addr < __direct_map_base + __direct_map_size) { /* Nothing to do. */ - DBGA(" (%ld) direct [%lx,%lx]\n", + DBGA(" (%ld) direct [%llx,%zx]\n", sg - end + nents, addr, size); continue; } - DBGA(" (%ld) sg [%lx,%lx]\n", + DBGA(" (%ld) sg [%llx,%zx]\n", sg - end + nents, addr, size); npages = iommu_num_pages(addr, size, PAGE_SIZE); diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h index fe14c6747cd6..567f2598d090 100644 --- a/arch/alpha/kernel/proto.h +++ b/arch/alpha/kernel/proto.h @@ -20,7 +20,7 @@ struct pci_controller; extern struct pci_ops apecs_pci_ops; extern void apecs_init_arch(void); extern void apecs_pci_clr_err(void); -extern void apecs_machine_check(u64, u64); +extern void apecs_machine_check(unsigned long vector, unsigned long la_ptr); extern void apecs_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_cia.c */ @@ -29,7 +29,7 @@ extern void cia_init_pci(void); extern void cia_init_arch(void); extern void pyxis_init_arch(void); extern void cia_kill_arch(int); -extern void cia_machine_check(u64, u64); +extern void cia_machine_check(unsigned long vector, unsigned long la_ptr); extern void cia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_irongate.c */ @@ -42,7 +42,7 @@ extern void irongate_machine_check(u64, u64); /* core_lca.c */ extern struct pci_ops lca_pci_ops; extern void lca_init_arch(void); -extern void lca_machine_check(u64, u64); +extern void lca_machine_check(unsigned long vector, unsigned long la_ptr); extern void lca_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_marvel.c */ @@ -64,7 +64,7 @@ void io7_clear_errors(struct io7 *io7); extern struct pci_ops mcpcia_pci_ops; extern void mcpcia_init_arch(void); extern void mcpcia_init_hoses(void); -extern void mcpcia_machine_check(u64, u64); +extern void mcpcia_machine_check(unsigned long vector, unsigned long la_ptr); extern void mcpcia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_polaris.c */ @@ -72,14 +72,14 @@ extern struct pci_ops polaris_pci_ops; extern int polaris_read_config_dword(struct pci_dev *, int, u32 *); extern int polaris_write_config_dword(struct pci_dev *, int, u32); extern void polaris_init_arch(void); -extern void polaris_machine_check(u64, u64); +extern void polaris_machine_check(unsigned long vector, unsigned long la_ptr); #define polaris_pci_tbi ((void *)0) /* core_t2.c */ extern struct pci_ops t2_pci_ops; extern void t2_init_arch(void); extern void t2_kill_arch(int); -extern void t2_machine_check(u64, u64); +extern void t2_machine_check(unsigned long vector, unsigned long la_ptr); extern void t2_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_titan.c */ @@ -94,14 +94,14 @@ extern struct _alpha_agp_info *titan_agp_info(void); extern struct pci_ops tsunami_pci_ops; extern void tsunami_init_arch(void); extern void tsunami_kill_arch(int); -extern void tsunami_machine_check(u64, u64); +extern void tsunami_machine_check(unsigned long vector, unsigned long la_ptr); extern void tsunami_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); /* core_wildfire.c */ extern struct pci_ops wildfire_pci_ops; extern void wildfire_init_arch(void); extern void wildfire_kill_arch(int); -extern void wildfire_machine_check(u64, u64); +extern void wildfire_machine_check(unsigned long vector, unsigned long la_ptr); extern void wildfire_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t); extern int wildfire_pa_to_nid(unsigned long); extern int wildfire_cpuid_to_nid(int); diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 02bee6983ce2..80df86cd746b 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -1255,7 +1255,7 @@ show_cpuinfo(struct seq_file *f, void *slot) platform_string(), nr_processors); #ifdef CONFIG_SMP - seq_printf(f, "cpus active\t\t: %d\n" + seq_printf(f, "cpus active\t\t: %u\n" "cpu active mask\t\t: %016lx\n", num_online_cpus(), cpus_addr(cpu_possible_map)[0]); #endif diff --git a/arch/alpha/kernel/smc37c669.c b/arch/alpha/kernel/smc37c669.c index fd467b207f0f..bca5bda90cde 100644 --- a/arch/alpha/kernel/smc37c669.c +++ b/arch/alpha/kernel/smc37c669.c @@ -2542,8 +2542,8 @@ void __init SMC669_Init ( int index ) SMC37c669_display_device_info( ); #endif local_irq_restore(flags); - printk( "SMC37c669 Super I/O Controller found @ 0x%lx\n", - (unsigned long) SMC_base ); + printk( "SMC37c669 Super I/O Controller found @ 0x%p\n", + SMC_base ); } else { local_irq_restore(flags); diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index e2516f9a8967..2b5caf3d9b15 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -244,12 +244,11 @@ jensen_init_arch(void) } static void -jensen_machine_check (u64 vector, u64 la) +jensen_machine_check(unsigned long vector, unsigned long la) { printk(KERN_CRIT "Machine check\n"); } - /* * The System Vector */ diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index d232e42be018..9e263256a42d 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -453,7 +453,7 @@ sable_lynx_enable_irq(unsigned int irq) sable_lynx_irq_swizzle->update_irq_hw(bit, mask); spin_unlock(&sable_lynx_irq_lock); #if 0 - printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n", + printk("%s: mask 0x%lx bit 0x%lx irq 0x%x\n", __func__, mask, bit, irq); #endif } @@ -469,7 +469,7 @@ sable_lynx_disable_irq(unsigned int irq) sable_lynx_irq_swizzle->update_irq_hw(bit, mask); spin_unlock(&sable_lynx_irq_lock); #if 0 - printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n", + printk("%s: mask 0x%lx bit 0x%lx irq 0x%x\n", __func__, mask, bit, irq); #endif } diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index cefc5a355ef9..6ee7655b7568 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -623,7 +623,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg, } lock_kernel(); - printk("Bad unaligned kernel access at %016lx: %p %lx %ld\n", + printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n", pc, va, opcode, reg); do_exit(SIGSEGV); diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index ce4e4296b954..62d4abbaa654 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -250,21 +250,3 @@ asmlinkage void do_bus_error(unsigned long addr, int write_access, dump_dtlb(); die("Bus Error", regs, SIGKILL); } - -/* - * This functionality is currently not possible to implement because - * we're using segmentation to ensure a fixed mapping of the kernel - * virtual address space. - * - * It would be possible to implement this, but it would require us to - * disable segmentation at startup and load the kernel mappings into - * the TLB like any other pages. There will be lots of trickery to - * avoid recursive invocation of the TLB miss handler, though... - */ -#ifdef CONFIG_DEBUG_PAGEALLOC -void kernel_map_pages(struct page *page, int numpages, int enable) -{ - -} -EXPORT_SYMBOL(kernel_map_pages); -#endif diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 24b1ad5334cb..2bef5261d96d 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -24,6 +24,7 @@ #include <linux/major.h> #include <linux/fcntl.h> #include <linux/mm.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/capability.h> #include <linux/console.h> @@ -848,38 +849,36 @@ static int rs_open(struct tty_struct *tty, struct file * filp) * /proc fs routines.... */ -static inline int line_info(char *buf, struct serial_state *state) +static inline void line_info(struct seq_file *m, struct serial_state *state) { - return sprintf(buf, "%d: uart:%s port:%lX irq:%d\n", + seq_printf(m, "%d: uart:%s port:%lX irq:%d\n", state->line, uart_config[state->type].name, state->port, state->irq); } -static int rs_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int rs_proc_show(struct seq_file *m, void *v) { - int i, len = 0, l; - off_t begin = 0; - - len += sprintf(page, "simserinfo:1.0 driver:%s\n", serial_version); - for (i = 0; i < NR_PORTS && len < 4000; i++) { - l = line_info(page + len, &rs_table[i]); - len += l; - if (len+begin > off+count) - goto done; - if (len+begin < off) { - begin += len; - len = 0; - } - } - *eof = 1; -done: - if (off >= len+begin) - return 0; - *start = page + (begin-off); - return ((count < begin+len-off) ? count : begin+len-off); + int i; + + seq_printf(m, "simserinfo:1.0 driver:%s\n", serial_version); + for (i = 0; i < NR_PORTS; i++) + line_info(m, &rs_table[i]); + return 0; } +static int rs_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, rs_proc_show, NULL); +} + +static const struct file_operations rs_proc_fops = { + .owner = THIS_MODULE, + .open = rs_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * --------------------------------------------------------------------- * rs_init() and friends @@ -917,7 +916,7 @@ static const struct tty_operations hp_ops = { .start = rs_start, .hangup = rs_hangup, .wait_until_sent = rs_wait_until_sent, - .read_proc = rs_read_proc, + .proc_fops = &rs_proc_fops, }; /* diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h index c47830e26cb7..111ed5222892 100644 --- a/arch/ia64/include/asm/intrinsics.h +++ b/arch/ia64/include/asm/intrinsics.h @@ -202,7 +202,11 @@ extern long ia64_cmpxchg_called_with_bad_pointer (void); #ifndef __ASSEMBLY__ #if defined(CONFIG_PARAVIRT) && defined(__KERNEL__) -#define IA64_INTRINSIC_API(name) pv_cpu_ops.name +#ifdef ASM_SUPPORTED +# define IA64_INTRINSIC_API(name) paravirt_ ## name +#else +# define IA64_INTRINSIC_API(name) pv_cpu_ops.name +#endif #define IA64_INTRINSIC_MACRO(name) paravirt_ ## name #else #define IA64_INTRINSIC_API(name) ia64_native_ ## name diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h index 040bc87db930..7f2a456603cb 100644 --- a/arch/ia64/include/asm/mmu_context.h +++ b/arch/ia64/include/asm/mmu_context.h @@ -87,7 +87,7 @@ get_mmu_context (struct mm_struct *mm) /* re-check, now that we've got the lock: */ context = mm->context; if (context == 0) { - cpus_clear(mm->cpu_vm_mask); + cpumask_clear(mm_cpumask(mm)); if (ia64_ctx.next >= ia64_ctx.limit) { ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, ia64_ctx.max_ctx, ia64_ctx.next); @@ -166,8 +166,8 @@ activate_context (struct mm_struct *mm) do { context = get_mmu_context(mm); - if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) - cpu_set(smp_processor_id(), mm->cpu_vm_mask); + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); reload_context(context); /* * in the unlikely event of a TLB-flush by another thread, diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h index d2da61e4c49b..908eaef42a08 100644 --- a/arch/ia64/include/asm/module.h +++ b/arch/ia64/include/asm/module.h @@ -16,6 +16,12 @@ struct mod_arch_specific { struct elf64_shdr *got; /* global offset table */ struct elf64_shdr *opd; /* official procedure descriptors */ struct elf64_shdr *unwind; /* unwind-table section */ +#ifdef CONFIG_PARAVIRT + struct elf64_shdr *paravirt_bundles; + /* paravirt_alt_bundle_patch table */ + struct elf64_shdr *paravirt_insts; + /* paravirt_alt_inst_patch table */ +#endif unsigned long gp; /* global-pointer for module */ void *core_unw_table; /* core unwind-table cookie returned by unwinder */ diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h index 0a1026cca4fa..d2d46efb3e6e 100644 --- a/arch/ia64/include/asm/native/inst.h +++ b/arch/ia64/include/asm/native/inst.h @@ -30,6 +30,9 @@ #define __paravirt_work_processed_syscall_target \ ia64_work_processed_syscall +#define paravirt_fsyscall_table ia64_native_fsyscall_table +#define paravirt_fsys_bubble_down ia64_native_fsys_bubble_down + #ifdef CONFIG_PARAVIRT_GUEST_ASM_CLOBBER_CHECK # define PARAVIRT_POISON 0xdeadbeefbaadf00d # define CLOBBER(clob) \ @@ -74,6 +77,11 @@ (pred) mov reg = psr \ CLOBBER(clob) +#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \ +(pred) mov reg = ar.itc \ + CLOBBER(clob) \ + CLOBBER_PRED(pred_clob) + #define MOV_TO_IFA(reg, clob) \ mov cr.ifa = reg \ CLOBBER(clob) @@ -158,6 +166,11 @@ #define RSM_PSR_DT \ rsm psr.dt +#define RSM_PSR_BE_I(clob0, clob1) \ + rsm psr.be | psr.i \ + CLOBBER(clob0) \ + CLOBBER(clob1) + #define SSM_PSR_DT_AND_SRLZ_I \ ssm psr.dt \ ;; \ diff --git a/arch/ia64/include/asm/native/patchlist.h b/arch/ia64/include/asm/native/patchlist.h new file mode 100644 index 000000000000..be16ca9311bf --- /dev/null +++ b/arch/ia64/include/asm/native/patchlist.h @@ -0,0 +1,38 @@ +/****************************************************************************** + * arch/ia64/include/asm/native/inst.h + * + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define __paravirt_start_gate_fsyscall_patchlist \ + __ia64_native_start_gate_fsyscall_patchlist +#define __paravirt_end_gate_fsyscall_patchlist \ + __ia64_native_end_gate_fsyscall_patchlist +#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist \ + __ia64_native_start_gate_brl_fsys_bubble_down_patchlist +#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist \ + __ia64_native_end_gate_brl_fsys_bubble_down_patchlist +#define __paravirt_start_gate_vtop_patchlist \ + __ia64_native_start_gate_vtop_patchlist +#define __paravirt_end_gate_vtop_patchlist \ + __ia64_native_end_gate_vtop_patchlist +#define __paravirt_start_gate_mckinley_e9_patchlist \ + __ia64_native_start_gate_mckinley_e9_patchlist +#define __paravirt_end_gate_mckinley_e9_patchlist \ + __ia64_native_end_gate_mckinley_e9_patchlist diff --git a/arch/ia64/include/asm/native/pvchk_inst.h b/arch/ia64/include/asm/native/pvchk_inst.h index b8e6eb1090d7..8d72962ec838 100644 --- a/arch/ia64/include/asm/native/pvchk_inst.h +++ b/arch/ia64/include/asm/native/pvchk_inst.h @@ -180,6 +180,11 @@ IS_PRED_IN(pred) \ IS_RREG_OUT(reg) \ IS_RREG_CLOB(clob) +#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \ + IS_PRED_IN(pred) \ + IS_PRED_CLOB(pred_clob) \ + IS_RREG_OUT(reg) \ + IS_RREG_CLOB(clob) #define MOV_TO_IFA(reg, clob) \ IS_RREG_IN(reg) \ IS_RREG_CLOB(clob) @@ -246,6 +251,9 @@ IS_RREG_CLOB(clob2) #define RSM_PSR_DT \ nop 0 +#define RSM_PSR_BE_I(clob0, clob1) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) #define SSM_PSR_DT_AND_SRLZ_I \ nop 0 #define BSW_0(clob0, clob1, clob2) \ diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index 2bf3636473fe..2eb0a981a09a 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h @@ -22,6 +22,56 @@ #ifndef __ASM_PARAVIRT_H #define __ASM_PARAVIRT_H +#ifndef __ASSEMBLY__ +/****************************************************************************** + * fsys related addresses + */ +struct pv_fsys_data { + unsigned long *fsyscall_table; + void *fsys_bubble_down; +}; + +extern struct pv_fsys_data pv_fsys_data; + +unsigned long *paravirt_get_fsyscall_table(void); +char *paravirt_get_fsys_bubble_down(void); + +/****************************************************************************** + * patchlist addresses for gate page + */ +enum pv_gate_patchlist { + PV_GATE_START_FSYSCALL, + PV_GATE_END_FSYSCALL, + + PV_GATE_START_BRL_FSYS_BUBBLE_DOWN, + PV_GATE_END_BRL_FSYS_BUBBLE_DOWN, + + PV_GATE_START_VTOP, + PV_GATE_END_VTOP, + + PV_GATE_START_MCKINLEY_E9, + PV_GATE_END_MCKINLEY_E9, +}; + +struct pv_patchdata { + unsigned long start_fsyscall_patchlist; + unsigned long end_fsyscall_patchlist; + unsigned long start_brl_fsys_bubble_down_patchlist; + unsigned long end_brl_fsys_bubble_down_patchlist; + unsigned long start_vtop_patchlist; + unsigned long end_vtop_patchlist; + unsigned long start_mckinley_e9_patchlist; + unsigned long end_mckinley_e9_patchlist; + + void *gate_section; +}; + +extern struct pv_patchdata pv_patchdata; + +unsigned long paravirt_get_gate_patchlist(enum pv_gate_patchlist type); +void *paravirt_get_gate_section(void); +#endif + #ifdef CONFIG_PARAVIRT_GUEST #define PARAVIRT_HYPERVISOR_TYPE_DEFAULT 0 @@ -68,6 +118,14 @@ struct pv_init_ops { int (*arch_setup_nomca)(void); void (*post_smp_prepare_boot_cpu)(void); + +#ifdef ASM_SUPPORTED + unsigned long (*patch_bundle)(void *sbundle, void *ebundle, + unsigned long type); + unsigned long (*patch_inst)(unsigned long stag, unsigned long etag, + unsigned long type); +#endif + void (*patch_branch)(unsigned long tag, unsigned long type); }; extern struct pv_init_ops pv_init_ops; @@ -210,6 +268,8 @@ struct pv_time_ops { int (*do_steal_accounting)(unsigned long *new_itm); void (*clocksource_resume)(void); + + unsigned long long (*sched_clock)(void); }; extern struct pv_time_ops pv_time_ops; @@ -227,6 +287,11 @@ paravirt_do_steal_accounting(unsigned long *new_itm) return pv_time_ops.do_steal_accounting(new_itm); } +static inline unsigned long long paravirt_sched_clock(void) +{ + return pv_time_ops.sched_clock(); +} + #endif /* !__ASSEMBLY__ */ #else diff --git a/arch/ia64/include/asm/paravirt_patch.h b/arch/ia64/include/asm/paravirt_patch.h new file mode 100644 index 000000000000..128ff5db6e67 --- /dev/null +++ b/arch/ia64/include/asm/paravirt_patch.h @@ -0,0 +1,143 @@ +/****************************************************************************** + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __ASM_PARAVIRT_PATCH_H +#define __ASM_PARAVIRT_PATCH_H + +#ifdef __ASSEMBLY__ + + .section .paravirt_branches, "a" + .previous +#define PARAVIRT_PATCH_SITE_BR(type) \ + { \ + [1:] ; \ + br.cond.sptk.many 2f ; \ + nop.b 0 ; \ + nop.b 0;; ; \ + } ; \ + 2: \ + .xdata8 ".paravirt_branches", 1b, type + +#else + +#include <linux/stringify.h> +#include <asm/intrinsics.h> + +/* for binary patch */ +struct paravirt_patch_site_bundle { + void *sbundle; + void *ebundle; + unsigned long type; +}; + +/* label means the beginning of new bundle */ +#define paravirt_alt_bundle(instr, privop) \ + "\t998:\n" \ + "\t" instr "\n" \ + "\t999:\n" \ + "\t.pushsection .paravirt_bundles, \"a\"\n" \ + "\t.popsection\n" \ + "\t.xdata8 \".paravirt_bundles\", 998b, 999b, " \ + __stringify(privop) "\n" + + +struct paravirt_patch_bundle_elem { + const void *sbundle; + const void *ebundle; + unsigned long type; +}; + + +struct paravirt_patch_site_inst { + unsigned long stag; + unsigned long etag; + unsigned long type; +}; + +#define paravirt_alt_inst(instr, privop) \ + "\t[998:]\n" \ + "\t" instr "\n" \ + "\t[999:]\n" \ + "\t.pushsection .paravirt_insts, \"a\"\n" \ + "\t.popsection\n" \ + "\t.xdata8 \".paravirt_insts\", 998b, 999b, " \ + __stringify(privop) "\n" + +struct paravirt_patch_site_branch { + unsigned long tag; + unsigned long type; +}; + +struct paravirt_patch_branch_target { + const void *entry; + unsigned long type; +}; + +void +__paravirt_patch_apply_branch( + unsigned long tag, unsigned long type, + const struct paravirt_patch_branch_target *entries, + unsigned int nr_entries); + +void +paravirt_patch_reloc_br(unsigned long tag, const void *target); + +void +paravirt_patch_reloc_brl(unsigned long tag, const void *target); + + +#if defined(ASM_SUPPORTED) && defined(CONFIG_PARAVIRT) +unsigned long +ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type); + +unsigned long +__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type, + const struct paravirt_patch_bundle_elem *elems, + unsigned long nelems, + const struct paravirt_patch_bundle_elem **found); + +void +paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start, + const struct paravirt_patch_site_bundle *end); + +void +paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start, + const struct paravirt_patch_site_inst *end); + +void paravirt_patch_apply(void); +#else +#define paravirt_patch_apply_bundle(start, end) do { } while (0) +#define paravirt_patch_apply_inst(start, end) do { } while (0) +#define paravirt_patch_apply() do { } while (0) +#endif + +#endif /* !__ASSEMBLEY__ */ + +#endif /* __ASM_PARAVIRT_PATCH_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "linux" + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + */ diff --git a/arch/ia64/include/asm/paravirt_privop.h b/arch/ia64/include/asm/paravirt_privop.h index 33c8e55f5775..3d2951130b5f 100644 --- a/arch/ia64/include/asm/paravirt_privop.h +++ b/arch/ia64/include/asm/paravirt_privop.h @@ -33,7 +33,7 @@ */ struct pv_cpu_ops { - void (*fc)(unsigned long addr); + void (*fc)(void *addr); unsigned long (*thash)(unsigned long addr); unsigned long (*get_cpuid)(int index); unsigned long (*get_pmd)(int index); @@ -60,12 +60,18 @@ extern unsigned long ia64_native_getreg_func(int regnum); /* Instructions paravirtualized for performance */ /************************************************/ +#ifndef ASM_SUPPORTED +#define paravirt_ssm_i() pv_cpu_ops.ssm_i() +#define paravirt_rsm_i() pv_cpu_ops.rsm_i() +#define __paravirt_getreg() pv_cpu_ops.getreg() +#endif + /* mask for ia64_native_ssm/rsm() must be constant.("i" constraing). * static inline function doesn't satisfy it. */ #define paravirt_ssm(mask) \ do { \ if ((mask) == IA64_PSR_I) \ - pv_cpu_ops.ssm_i(); \ + paravirt_ssm_i(); \ else \ ia64_native_ssm(mask); \ } while (0) @@ -73,7 +79,7 @@ extern unsigned long ia64_native_getreg_func(int regnum); #define paravirt_rsm(mask) \ do { \ if ((mask) == IA64_PSR_I) \ - pv_cpu_ops.rsm_i(); \ + paravirt_rsm_i(); \ else \ ia64_native_rsm(mask); \ } while (0) @@ -86,7 +92,7 @@ extern unsigned long ia64_native_getreg_func(int regnum); if ((reg) == _IA64_REG_IP) \ res = ia64_native_getreg(_IA64_REG_IP); \ else \ - res = pv_cpu_ops.getreg(reg); \ + res = __paravirt_getreg(reg); \ res; \ }) @@ -112,6 +118,12 @@ void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch); #endif /* CONFIG_PARAVIRT */ +#if defined(CONFIG_PARAVIRT) && defined(ASM_SUPPORTED) +#define paravirt_dv_serialize_data() ia64_dv_serialize_data() +#else +#define paravirt_dv_serialize_data() /* nothing */ +#endif + /* these routines utilize privilege-sensitive or performance-sensitive * privileged instructions so the code must be replaced with * paravirtualized versions */ @@ -121,4 +133,349 @@ void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch); IA64_PARAVIRT_ASM_FUNC(work_processed_syscall) #define ia64_leave_kernel IA64_PARAVIRT_ASM_FUNC(leave_kernel) + +#if defined(CONFIG_PARAVIRT) +/****************************************************************************** + * binary patching infrastructure + */ +#define PARAVIRT_PATCH_TYPE_FC 1 +#define PARAVIRT_PATCH_TYPE_THASH 2 +#define PARAVIRT_PATCH_TYPE_GET_CPUID 3 +#define PARAVIRT_PATCH_TYPE_GET_PMD 4 +#define PARAVIRT_PATCH_TYPE_PTCGA 5 +#define PARAVIRT_PATCH_TYPE_GET_RR 6 +#define PARAVIRT_PATCH_TYPE_SET_RR 7 +#define PARAVIRT_PATCH_TYPE_SET_RR0_TO_RR4 8 +#define PARAVIRT_PATCH_TYPE_SSM_I 9 +#define PARAVIRT_PATCH_TYPE_RSM_I 10 +#define PARAVIRT_PATCH_TYPE_GET_PSR_I 11 +#define PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE 12 + +/* PARAVIRT_PATY_TYPE_[GS]ETREG + _IA64_REG_xxx */ +#define PARAVIRT_PATCH_TYPE_GETREG 0x10000000 +#define PARAVIRT_PATCH_TYPE_SETREG 0x20000000 + +/* + * struct task_struct* (*ia64_switch_to)(void* next_task); + * void *ia64_leave_syscall; + * void *ia64_work_processed_syscall + * void *ia64_leave_kernel; + */ + +#define PARAVIRT_PATCH_TYPE_BR_START 0x30000000 +#define PARAVIRT_PATCH_TYPE_BR_SWITCH_TO \ + (PARAVIRT_PATCH_TYPE_BR_START + 0) +#define PARAVIRT_PATCH_TYPE_BR_LEAVE_SYSCALL \ + (PARAVIRT_PATCH_TYPE_BR_START + 1) +#define PARAVIRT_PATCH_TYPE_BR_WORK_PROCESSED_SYSCALL \ + (PARAVIRT_PATCH_TYPE_BR_START + 2) +#define PARAVIRT_PATCH_TYPE_BR_LEAVE_KERNEL \ + (PARAVIRT_PATCH_TYPE_BR_START + 3) + +#ifdef ASM_SUPPORTED +#include <asm/paravirt_patch.h> + +/* + * pv_cpu_ops calling stub. + * normal function call convension can't be written by gcc + * inline assembly. + * + * from the caller's point of view, + * the following registers will be clobbered. + * r2, r3 + * r8-r15 + * r16, r17 + * b6, b7 + * p6-p15 + * ar.ccv + * + * from the callee's point of view , + * the following registers can be used. + * r2, r3: scratch + * r8: scratch, input argument0 and return value + * r0-r15: scratch, input argument1-5 + * b6: return pointer + * b7: scratch + * p6-p15: scratch + * ar.ccv: scratch + * + * other registers must not be changed. especially + * b0: rp: preserved. gcc ignores b0 in clobbered register. + * r16: saved gp + */ +/* 5 bundles */ +#define __PARAVIRT_BR \ + ";;\n" \ + "{ .mlx\n" \ + "nop 0\n" \ + "movl r2 = %[op_addr]\n"/* get function pointer address */ \ + ";;\n" \ + "}\n" \ + "1:\n" \ + "{ .mii\n" \ + "ld8 r2 = [r2]\n" /* load function descriptor address */ \ + "mov r17 = ip\n" /* get ip to calc return address */ \ + "mov r16 = gp\n" /* save gp */ \ + ";;\n" \ + "}\n" \ + "{ .mii\n" \ + "ld8 r3 = [r2], 8\n" /* load entry address */ \ + "adds r17 = 1f - 1b, r17\n" /* calculate return address */ \ + ";;\n" \ + "mov b7 = r3\n" /* set entry address */ \ + "}\n" \ + "{ .mib\n" \ + "ld8 gp = [r2]\n" /* load gp value */ \ + "mov b6 = r17\n" /* set return address */ \ + "br.cond.sptk.few b7\n" /* intrinsics are very short isns */ \ + "}\n" \ + "1:\n" \ + "{ .mii\n" \ + "mov gp = r16\n" /* restore gp value */ \ + "nop 0\n" \ + "nop 0\n" \ + ";;\n" \ + "}\n" + +#define PARAVIRT_OP(op) \ + [op_addr] "i"(&pv_cpu_ops.op) + +#define PARAVIRT_TYPE(type) \ + PARAVIRT_PATCH_TYPE_ ## type + +#define PARAVIRT_REG_CLOBBERS0 \ + "r2", "r3", /*"r8",*/ "r9", "r10", "r11", "r14", \ + "r15", "r16", "r17" + +#define PARAVIRT_REG_CLOBBERS1 \ + "r2","r3", /*"r8",*/ "r9", "r10", "r11", "r14", \ + "r15", "r16", "r17" + +#define PARAVIRT_REG_CLOBBERS2 \ + "r2", "r3", /*"r8", "r9",*/ "r10", "r11", "r14", \ + "r15", "r16", "r17" + +#define PARAVIRT_REG_CLOBBERS5 \ + "r2", "r3", /*"r8", "r9", "r10", "r11", "r14",*/ \ + "r15", "r16", "r17" + +#define PARAVIRT_BR_CLOBBERS \ + "b6", "b7" + +#define PARAVIRT_PR_CLOBBERS \ + "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15" + +#define PARAVIRT_AR_CLOBBERS \ + "ar.ccv" + +#define PARAVIRT_CLOBBERS0 \ + PARAVIRT_REG_CLOBBERS0, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_CLOBBERS1 \ + PARAVIRT_REG_CLOBBERS1, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_CLOBBERS2 \ + PARAVIRT_REG_CLOBBERS2, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_CLOBBERS5 \ + PARAVIRT_REG_CLOBBERS5, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_BR0(op, type) \ + register unsigned long ia64_clobber asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber) \ + : PARAVIRT_OP(op) \ + : PARAVIRT_CLOBBERS0) + +#define PARAVIRT_BR0_RET(op, type) \ + register unsigned long ia64_intri_res asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_intri_res) \ + : PARAVIRT_OP(op) \ + : PARAVIRT_CLOBBERS0) + +#define PARAVIRT_BR1(op, type, arg1) \ + register unsigned long __##arg1 asm ("r8") = arg1; \ + register unsigned long ia64_clobber asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber) \ + : PARAVIRT_OP(op), "0"(__##arg1) \ + : PARAVIRT_CLOBBERS1) + +#define PARAVIRT_BR1_RET(op, type, arg1) \ + register unsigned long ia64_intri_res asm ("r8"); \ + register unsigned long __##arg1 asm ("r8") = arg1; \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_intri_res) \ + : PARAVIRT_OP(op), "0"(__##arg1) \ + : PARAVIRT_CLOBBERS1) + +#define PARAVIRT_BR1_VOID(op, type, arg1) \ + register void *__##arg1 asm ("r8") = arg1; \ + register unsigned long ia64_clobber asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber) \ + : PARAVIRT_OP(op), "0"(__##arg1) \ + : PARAVIRT_CLOBBERS1) + +#define PARAVIRT_BR2(op, type, arg1, arg2) \ + register unsigned long __##arg1 asm ("r8") = arg1; \ + register unsigned long __##arg2 asm ("r9") = arg2; \ + register unsigned long ia64_clobber1 asm ("r8"); \ + register unsigned long ia64_clobber2 asm ("r9"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber1), "=r"(ia64_clobber2) \ + : PARAVIRT_OP(op), "0"(__##arg1), "1"(__##arg2) \ + : PARAVIRT_CLOBBERS2) + + +#define PARAVIRT_DEFINE_CPU_OP0(op, type) \ + static inline void \ + paravirt_ ## op (void) \ + { \ + PARAVIRT_BR0(op, type); \ + } + +#define PARAVIRT_DEFINE_CPU_OP0_RET(op, type) \ + static inline unsigned long \ + paravirt_ ## op (void) \ + { \ + PARAVIRT_BR0_RET(op, type); \ + return ia64_intri_res; \ + } + +#define PARAVIRT_DEFINE_CPU_OP1_VOID(op, type) \ + static inline void \ + paravirt_ ## op (void *arg1) \ + { \ + PARAVIRT_BR1_VOID(op, type, arg1); \ + } + +#define PARAVIRT_DEFINE_CPU_OP1(op, type) \ + static inline void \ + paravirt_ ## op (unsigned long arg1) \ + { \ + PARAVIRT_BR1(op, type, arg1); \ + } + +#define PARAVIRT_DEFINE_CPU_OP1_RET(op, type) \ + static inline unsigned long \ + paravirt_ ## op (unsigned long arg1) \ + { \ + PARAVIRT_BR1_RET(op, type, arg1); \ + return ia64_intri_res; \ + } + +#define PARAVIRT_DEFINE_CPU_OP2(op, type) \ + static inline void \ + paravirt_ ## op (unsigned long arg1, \ + unsigned long arg2) \ + { \ + PARAVIRT_BR2(op, type, arg1, arg2); \ + } + + +PARAVIRT_DEFINE_CPU_OP1_VOID(fc, FC); +PARAVIRT_DEFINE_CPU_OP1_RET(thash, THASH) +PARAVIRT_DEFINE_CPU_OP1_RET(get_cpuid, GET_CPUID) +PARAVIRT_DEFINE_CPU_OP1_RET(get_pmd, GET_PMD) +PARAVIRT_DEFINE_CPU_OP2(ptcga, PTCGA) +PARAVIRT_DEFINE_CPU_OP1_RET(get_rr, GET_RR) +PARAVIRT_DEFINE_CPU_OP2(set_rr, SET_RR) +PARAVIRT_DEFINE_CPU_OP0(ssm_i, SSM_I) +PARAVIRT_DEFINE_CPU_OP0(rsm_i, RSM_I) +PARAVIRT_DEFINE_CPU_OP0_RET(get_psr_i, GET_PSR_I) +PARAVIRT_DEFINE_CPU_OP1(intrin_local_irq_restore, INTRIN_LOCAL_IRQ_RESTORE) + +static inline void +paravirt_set_rr0_to_rr4(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4) +{ + register unsigned long __val0 asm ("r8") = val0; + register unsigned long __val1 asm ("r9") = val1; + register unsigned long __val2 asm ("r10") = val2; + register unsigned long __val3 asm ("r11") = val3; + register unsigned long __val4 asm ("r14") = val4; + + register unsigned long ia64_clobber0 asm ("r8"); + register unsigned long ia64_clobber1 asm ("r9"); + register unsigned long ia64_clobber2 asm ("r10"); + register unsigned long ia64_clobber3 asm ("r11"); + register unsigned long ia64_clobber4 asm ("r14"); + + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, + PARAVIRT_TYPE(SET_RR0_TO_RR4)) + : "=r"(ia64_clobber0), + "=r"(ia64_clobber1), + "=r"(ia64_clobber2), + "=r"(ia64_clobber3), + "=r"(ia64_clobber4) + : PARAVIRT_OP(set_rr0_to_rr4), + "0"(__val0), "1"(__val1), "2"(__val2), + "3"(__val3), "4"(__val4) + : PARAVIRT_CLOBBERS5); +} + +/* unsigned long paravirt_getreg(int reg) */ +#define __paravirt_getreg(reg) \ + ({ \ + register unsigned long ia64_intri_res asm ("r8"); \ + register unsigned long __reg asm ("r8") = (reg); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(reg)); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(GETREG) \ + + (reg)) \ + : "=r"(ia64_intri_res) \ + : PARAVIRT_OP(getreg), "0"(__reg) \ + : PARAVIRT_CLOBBERS1); \ + \ + ia64_intri_res; \ + }) + +/* void paravirt_setreg(int reg, unsigned long val) */ +#define paravirt_setreg(reg, val) \ + do { \ + register unsigned long __val asm ("r8") = val; \ + register unsigned long __reg asm ("r9") = reg; \ + register unsigned long ia64_clobber1 asm ("r8"); \ + register unsigned long ia64_clobber2 asm ("r9"); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(reg)); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(SETREG) \ + + (reg)) \ + : "=r"(ia64_clobber1), \ + "=r"(ia64_clobber2) \ + : PARAVIRT_OP(setreg), \ + "1"(__reg), "0"(__val) \ + : PARAVIRT_CLOBBERS2); \ + } while (0) + +#endif /* ASM_SUPPORTED */ +#endif /* CONFIG_PARAVIRT && ASM_SUPPOTED */ + #endif /* _ASM_IA64_PARAVIRT_PRIVOP_H */ diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h index 21c402365d0e..598408336251 100644 --- a/arch/ia64/include/asm/smp.h +++ b/arch/ia64/include/asm/smp.h @@ -126,7 +126,8 @@ extern void identify_siblings (struct cpuinfo_ia64 *); extern int is_multithreading_enabled(void); extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi(cpumask_t mask); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #else /* CONFIG_SMP */ diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h index 4e03cfe74a0c..86c7db861180 100644 --- a/arch/ia64/include/asm/timex.h +++ b/arch/ia64/include/asm/timex.h @@ -40,5 +40,6 @@ get_cycles (void) } extern void ia64_cpu_local_tick (void); +extern unsigned long long ia64_native_sched_clock (void); #endif /* _ASM_IA64_TIMEX_H */ diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index f260dcf21515..7b4c8c70b2d1 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -112,11 +112,6 @@ void build_cpu_to_node_map(void); extern void arch_fix_phys_package_id(int num, u32 slot); -#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \ - CPU_MASK_ALL : \ - node_to_cpumask(pcibus_to_node(bus)) \ - ) - #define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ cpu_all_mask : \ cpumask_of_node(pcibus_to_node(bus))) diff --git a/arch/ia64/include/asm/xen/hypervisor.h b/arch/ia64/include/asm/xen/hypervisor.h index 7a804e80fc67..e425227a418e 100644 --- a/arch/ia64/include/asm/xen/hypervisor.h +++ b/arch/ia64/include/asm/xen/hypervisor.h @@ -33,9 +33,6 @@ #ifndef _ASM_IA64_XEN_HYPERVISOR_H #define _ASM_IA64_XEN_HYPERVISOR_H -#ifdef CONFIG_XEN - -#include <linux/init.h> #include <xen/interface/xen.h> #include <xen/interface/version.h> /* to compile feature.c */ #include <xen/features.h> /* to comiple xen-netfront.c */ @@ -43,22 +40,32 @@ /* xen_domain_type is set before executing any C code by early_xen_setup */ enum xen_domain_type { - XEN_NATIVE, - XEN_PV_DOMAIN, - XEN_HVM_DOMAIN, + XEN_NATIVE, /* running on bare hardware */ + XEN_PV_DOMAIN, /* running in a PV domain */ + XEN_HVM_DOMAIN, /* running in a Xen hvm domain*/ }; +#ifdef CONFIG_XEN extern enum xen_domain_type xen_domain_type; +#else +#define xen_domain_type XEN_NATIVE +#endif #define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) -#define xen_initial_domain() (xen_pv_domain() && \ +#define xen_pv_domain() (xen_domain() && \ + xen_domain_type == XEN_PV_DOMAIN) +#define xen_hvm_domain() (xen_domain() && \ + xen_domain_type == XEN_HVM_DOMAIN) + +#ifdef CONFIG_XEN_DOM0 +#define xen_initial_domain() (xen_pv_domain() && \ (xen_start_info->flags & SIF_INITDOMAIN)) -#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) +#else +#define xen_initial_domain() (0) +#endif -/* deprecated. remove this */ -#define is_running_on_xen() (xen_domain_type == XEN_PV_DOMAIN) +#ifdef CONFIG_XEN extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; @@ -74,16 +81,6 @@ void force_evtchn_callback(void); /* For setup_arch() in arch/ia64/kernel/setup.c */ void xen_ia64_enable_opt_feature(void); - -#else /* CONFIG_XEN */ - -#define xen_domain() (0) -#define xen_pv_domain() (0) -#define xen_initial_domain() (0) -#define xen_hvm_domain() (0) -#define is_running_on_xen() (0) /* deprecated. remove this */ #endif -#define is_initial_xendomain() (0) /* deprecated. remove this */ - #endif /* _ASM_IA64_XEN_HYPERVISOR_H */ diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h index 19c2ae1d878a..c53a47611208 100644 --- a/arch/ia64/include/asm/xen/inst.h +++ b/arch/ia64/include/asm/xen/inst.h @@ -33,6 +33,9 @@ #define __paravirt_work_processed_syscall_target \ xen_work_processed_syscall +#define paravirt_fsyscall_table xen_fsyscall_table +#define paravirt_fsys_bubble_down xen_fsys_bubble_down + #define MOV_FROM_IFA(reg) \ movl reg = XSI_IFA; \ ;; \ @@ -110,6 +113,27 @@ .endm #define MOV_FROM_PSR(pred, reg, clob) __MOV_FROM_PSR pred, reg, clob +/* assuming ar.itc is read with interrupt disabled. */ +#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \ +(pred) movl clob = XSI_ITC_OFFSET; \ + ;; \ +(pred) ld8 clob = [clob]; \ +(pred) mov reg = ar.itc; \ + ;; \ +(pred) add reg = reg, clob; \ + ;; \ +(pred) movl clob = XSI_ITC_LAST; \ + ;; \ +(pred) ld8 clob = [clob]; \ + ;; \ +(pred) cmp.geu.unc pred_clob, p0 = clob, reg; \ + ;; \ +(pred_clob) add reg = 1, clob; \ + ;; \ +(pred) movl clob = XSI_ITC_LAST; \ + ;; \ +(pred) st8 [clob] = reg + #define MOV_TO_IFA(reg, clob) \ movl clob = XSI_IFA; \ @@ -362,6 +386,10 @@ #define RSM_PSR_DT \ XEN_HYPER_RSM_PSR_DT +#define RSM_PSR_BE_I(clob0, clob1) \ + RSM_PSR_I(p0, clob0, clob1); \ + rum psr.be + #define SSM_PSR_DT_AND_SRLZ_I \ XEN_HYPER_SSM_PSR_DT diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h index f00fab40854d..e951e740bdf2 100644 --- a/arch/ia64/include/asm/xen/interface.h +++ b/arch/ia64/include/asm/xen/interface.h @@ -209,6 +209,15 @@ struct mapped_regs { unsigned long krs[8]; /* kernel registers */ unsigned long tmp[16]; /* temp registers (e.g. for hyperprivops) */ + + /* itc paravirtualization + * vAR.ITC = mAR.ITC + itc_offset + * itc_last is one which was lastly passed to + * the guest OS in order to prevent it from + * going backwords. + */ + unsigned long itc_offset; + unsigned long itc_last; }; }; }; diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h index 4d92d9bbda7b..c57fa910f2c9 100644 --- a/arch/ia64/include/asm/xen/minstate.h +++ b/arch/ia64/include/asm/xen/minstate.h @@ -1,3 +1,12 @@ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* read ar.itc in advance, and use it before leaving bank 0 */ +#define XEN_ACCOUNT_GET_STAMP \ + MOV_FROM_ITC(pUStk, p6, r20, r2); +#else +#define XEN_ACCOUNT_GET_STAMP +#endif + /* * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves * the minimum state necessary that allows us to turn psr.ic back @@ -123,7 +132,7 @@ ;; \ .mem.offset 0,0; st8.spill [r16]=r2,16; \ .mem.offset 8,0; st8.spill [r17]=r3,16; \ - ACCOUNT_GET_STAMP \ + XEN_ACCOUNT_GET_STAMP \ adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ ;; \ EXTRA; \ diff --git a/arch/ia64/include/asm/xen/patchlist.h b/arch/ia64/include/asm/xen/patchlist.h new file mode 100644 index 000000000000..eae944e88846 --- /dev/null +++ b/arch/ia64/include/asm/xen/patchlist.h @@ -0,0 +1,38 @@ +/****************************************************************************** + * arch/ia64/include/asm/xen/patchlist.h + * + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define __paravirt_start_gate_fsyscall_patchlist \ + __xen_start_gate_fsyscall_patchlist +#define __paravirt_end_gate_fsyscall_patchlist \ + __xen_end_gate_fsyscall_patchlist +#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist \ + __xen_start_gate_brl_fsys_bubble_down_patchlist +#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist \ + __xen_end_gate_brl_fsys_bubble_down_patchlist +#define __paravirt_start_gate_vtop_patchlist \ + __xen_start_gate_vtop_patchlist +#define __paravirt_end_gate_vtop_patchlist \ + __xen_end_gate_vtop_patchlist +#define __paravirt_start_gate_mckinley_e9_patchlist \ + __xen_start_gate_mckinley_e9_patchlist +#define __paravirt_end_gate_mckinley_e9_patchlist \ + __xen_end_gate_mckinley_e9_patchlist diff --git a/arch/ia64/include/asm/xen/privop.h b/arch/ia64/include/asm/xen/privop.h index 71ec7546e100..fb4ec5e0b066 100644 --- a/arch/ia64/include/asm/xen/privop.h +++ b/arch/ia64/include/asm/xen/privop.h @@ -55,6 +55,8 @@ #define XSI_BANK1_R16 (XSI_BASE + XSI_BANK1_R16_OFS) #define XSI_BANKNUM (XSI_BASE + XSI_BANKNUM_OFS) #define XSI_IHA (XSI_BASE + XSI_IHA_OFS) +#define XSI_ITC_OFFSET (XSI_BASE + XSI_ITC_OFFSET_OFS) +#define XSI_ITC_LAST (XSI_BASE + XSI_ITC_LAST_OFS) #endif #ifndef __ASSEMBLY__ @@ -67,7 +69,7 @@ * may have different semantics depending on whether they are executed * at PL0 vs PL!=0. When paravirtualized, these instructions mustn't * be allowed to execute directly, lest incorrect semantics result. */ -extern void xen_fc(unsigned long addr); +extern void xen_fc(void *addr); extern unsigned long xen_thash(unsigned long addr); /* Note that "ttag" and "cover" are also privilege-sensitive; "ttag" @@ -80,8 +82,10 @@ extern unsigned long xen_thash(unsigned long addr); extern unsigned long xen_get_cpuid(int index); extern unsigned long xen_get_pmd(int index); +#ifndef ASM_SUPPORTED extern unsigned long xen_get_eflag(void); /* see xen_ia64_getreg */ extern void xen_set_eflag(unsigned long); /* see xen_ia64_setreg */ +#endif /************************************************/ /* Instructions paravirtualized for performance */ @@ -106,6 +110,7 @@ extern void xen_set_eflag(unsigned long); /* see xen_ia64_setreg */ #define xen_get_virtual_pend() \ (*(((uint8_t *)XEN_MAPPEDREGS->interrupt_mask_addr) - 1)) +#ifndef ASM_SUPPORTED /* Although all privileged operations can be left to trap and will * be properly handled by Xen, some are frequent enough that we use * hyperprivops for performance. */ @@ -123,6 +128,7 @@ extern void xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1, unsigned long val4); extern void xen_set_kr(unsigned long index, unsigned long val); extern void xen_ptcga(unsigned long addr, unsigned long size); +#endif /* !ASM_SUPPORTED */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index f2778f2c4fd9..5628e9a990a6 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -5,7 +5,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ - irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ + irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o \ salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ unwind.o mca.o mca_asm.o topology.o dma-mapping.o @@ -36,7 +36,8 @@ obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o \ + paravirt_patch.o obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) @@ -45,35 +46,13 @@ endif obj-$(CONFIG_DMAR) += pci-dma.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o -# The gate DSO image is built using a special linker script. -targets += gate.so gate-syms.o - -extra-y += gate.so gate-syms.o gate.lds gate.o - # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 -CPPFLAGS_gate.lds := -P -C -U$(ARCH) - -quiet_cmd_gate = GATE $@ - cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ - -GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) -$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE - $(call if_changed,gate) - -$(obj)/built-in.o: $(obj)/gate-syms.o -$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o - -GATECFLAGS_gate-syms.o = -r -$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE - $(call if_changed,gate) - -# gate-data.o contains the gate DSO image as data in section .data.gate. -# We must build gate.so before we can assemble it. -# Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/gate-data.o: $(obj)/gate.so +# The gate DSO image is built using a special linker script. +include $(srctree)/arch/ia64/kernel/Makefile.gate +# tell compiled for native +CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE # Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config define sed-y @@ -109,9 +88,9 @@ include/asm-ia64/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s clean-files += $(objtree)/include/asm-ia64/nr-irqs.h # -# native ivt.S and entry.S +# native ivt.S, entry.S and fsys.S # -ASM_PARAVIRT_OBJS = ivt.o entry.o +ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o define paravirtualized_native AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate new file mode 100644 index 000000000000..1d87f84069b3 --- /dev/null +++ b/arch/ia64/kernel/Makefile.gate @@ -0,0 +1,27 @@ +# The gate DSO image is built using a special linker script. + +targets += gate.so gate-syms.o + +extra-y += gate.so gate-syms.o gate.lds gate.o + +CPPFLAGS_gate.lds := -P -C -U$(ARCH) + +quiet_cmd_gate = GATE $@ + cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ + +GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) +$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +$(obj)/built-in.o: $(obj)/gate-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o + +GATECFLAGS_gate-syms.o = -r +$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +# gate-data.o contains the gate DSO image as data in section .data.gate. +# We must build gate.so before we can assemble it. +# Note: kbuild does not track this dependency due to usage of .incbin +$(obj)/gate-data.o: $(obj)/gate.so diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index bdef2ce38c8b..5510317db37b 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -890,7 +890,7 @@ __init void prefill_possible_map(void) possible, max((possible - available_cpus), 0)); for (i = 0; i < possible; i++) - cpu_set(i, cpu_possible_map); + set_cpu_possible(i, true); } int acpi_map_lsapic(acpi_handle handle, int *pcpu) @@ -928,9 +928,9 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu) buffer.length = ACPI_ALLOCATE_BUFFER; buffer.pointer = NULL; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - if (cpu >= NR_CPUS) + cpumask_complement(&tmp_map, cpu_present_mask); + cpu = cpumask_first(&tmp_map); + if (cpu >= nr_cpu_ids) return -EINVAL; acpi_map_cpu2node(handle, cpu, physid); diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 742dbb1d5a4f..af5650169043 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -316,5 +316,7 @@ void foo(void) DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]); DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat); DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat); + DEFINE_MAPPED_REG_OFS(XSI_ITC_OFFSET_OFS, itc_offset); + DEFINE_MAPPED_REG_OFS(XSI_ITC_LAST_OFS, itc_last); #endif /* CONFIG_XEN */ } diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index efaff15d8cf1..7ef80e8161ce 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -456,6 +456,7 @@ efi_map_pal_code (void) GRANULEROUNDDOWN((unsigned long) pal_vaddr), pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), IA64_GRANULE_SHIFT); + paravirt_dv_serialize_data(); ia64_set_psr(psr); /* restore psr */ } diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index e5341e2c1175..ccfdeee9d89f 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -735,7 +735,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall) __paravirt_work_processed_syscall: #ifdef CONFIG_VIRT_CPU_ACCOUNTING adds r2=PT(LOADRS)+16,r12 -(pUStk) mov.m r22=ar.itc // fetch time at leave + MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 ;; (p6) ld4 r31=[r18] // load current_thread_info()->flags @@ -984,7 +984,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) #ifdef CONFIG_VIRT_CPU_ACCOUNTING .pred.rel.mutex pUStk,pKStk MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled -(pUStk) mov.m r22=ar.itc // M fetch time at leave + MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave nop.i 0 ;; #else diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index c1625c7e1779..3567d54f8cee 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -25,6 +25,7 @@ #include <asm/unistd.h> #include "entry.h" +#include "paravirt_inst.h" /* * See Documentation/ia64/fsys.txt for details on fsyscalls. @@ -279,7 +280,7 @@ ENTRY(fsys_gettimeofday) (p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control ;; .pred.rel.mutex p8,p9 -(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! + MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!! (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. (p13) ld8 r25 = [r19] // get itc_lastcycle value ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec @@ -418,7 +419,7 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1)) ;; - rsm psr.i // mask interrupt delivery + RSM_PSR_I(p0, r18, r19) // mask interrupt delivery mov ar.ccv=0 andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP @@ -491,7 +492,7 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set #ifdef CONFIG_SMP st4.rel [r31]=r0 // release the lock #endif - ssm psr.i + SSM_PSR_I(p0, p9, r31) ;; srlz.d // ensure psr.i is set again @@ -513,7 +514,7 @@ EX(.fail_efault, (p15) st8 [r34]=r3) #ifdef CONFIG_SMP st4.rel [r31]=r0 // release the lock #endif - ssm psr.i + SSM_PSR_I(p0, p9, r17) ;; srlz.d br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall @@ -521,7 +522,7 @@ EX(.fail_efault, (p15) st8 [r34]=r3) #ifdef CONFIG_SMP .lock_contention: /* Rather than spinning here, fall back on doing a heavy-weight syscall. */ - ssm psr.i + SSM_PSR_I(p0, p9, r17) ;; srlz.d br.sptk.many fsys_fallback_syscall @@ -592,17 +593,17 @@ ENTRY(fsys_fallback_syscall) adds r17=-1024,r15 movl r14=sys_call_table ;; - rsm psr.i + RSM_PSR_I(p0, r26, r27) shladd r18=r17,3,r14 ;; ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point - mov r29=psr // read psr (12 cyc load latency) + MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency) mov r27=ar.rsc mov r21=ar.fpsr mov r26=ar.pfs END(fsys_fallback_syscall) /* FALL THROUGH */ -GLOBAL_ENTRY(fsys_bubble_down) +GLOBAL_ENTRY(paravirt_fsys_bubble_down) .prologue .altrp b6 .body @@ -640,7 +641,7 @@ GLOBAL_ENTRY(fsys_bubble_down) * * PSR.BE : already is turned off in __kernel_syscall_via_epc() * PSR.AC : don't care (kernel normally turns PSR.AC on) - * PSR.I : already turned off by the time fsys_bubble_down gets + * PSR.I : already turned off by the time paravirt_fsys_bubble_down gets * invoked * PSR.DFL: always 0 (kernel never turns it on) * PSR.DFH: don't care --- kernel never touches f32-f127 on its own @@ -650,7 +651,7 @@ GLOBAL_ENTRY(fsys_bubble_down) * PSR.DB : don't care --- kernel never enables kernel-level * breakpoints * PSR.TB : must be 0 already; if it wasn't zero on entry to - * __kernel_syscall_via_epc, the branch to fsys_bubble_down + * __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down * will trigger a taken branch; the taken-trap-handler then * converts the syscall into a break-based system-call. */ @@ -683,7 +684,7 @@ GLOBAL_ENTRY(fsys_bubble_down) ;; mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 #ifdef CONFIG_VIRT_CPU_ACCOUNTING - mov.m r30=ar.itc // M get cycle for accounting + MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting #else nop.m 0 #endif @@ -734,21 +735,21 @@ GLOBAL_ENTRY(fsys_bubble_down) mov rp=r14 // I0 set the real return addr and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A ;; - ssm psr.i // M2 we're on kernel stacks now, reenable irqs + SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs cmp.eq p8,p0=r3,r0 // A (p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT nop.m 0 (p8) br.call.sptk.many b6=b6 // B (ignore return address) br.cond.spnt ia64_trace_syscall // B -END(fsys_bubble_down) +END(paravirt_fsys_bubble_down) .rodata .align 8 - .globl fsyscall_table + .globl paravirt_fsyscall_table - data8 fsys_bubble_down -fsyscall_table: + data8 paravirt_fsys_bubble_down +paravirt_fsyscall_table: data8 fsys_ni_syscall data8 0 // exit // 1025 data8 0 // read @@ -1033,4 +1034,4 @@ fsyscall_table: // fill in zeros for the remaining entries .zero: - .space fsyscall_table + 8*NR_syscalls - .zero, 0 + .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0 diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S index 74b1ccce4e84..cf5e0a105e16 100644 --- a/arch/ia64/kernel/gate.S +++ b/arch/ia64/kernel/gate.S @@ -13,6 +13,7 @@ #include <asm/sigcontext.h> #include <asm/system.h> #include <asm/unistd.h> +#include "paravirt_inst.h" /* * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, @@ -48,87 +49,6 @@ GLOBAL_ENTRY(__kernel_syscall_via_break) } END(__kernel_syscall_via_break) -/* - * On entry: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * b6 = return address - * On exit: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * all other "scratch" registers: undefined - * all "preserved" registers: same as on entry - */ - -GLOBAL_ENTRY(__kernel_syscall_via_epc) - .prologue - .altrp b6 - .body -{ - /* - * Note: the kernel cannot assume that the first two instructions in this - * bundle get executed. The remaining code must be safe even if - * they do not get executed. - */ - adds r17=-1024,r15 // A - mov r10=0 // A default to successful syscall execution - epc // B causes split-issue -} - ;; - rsm psr.be | psr.i // M2 (5 cyc to srlz.d) - LOAD_FSYSCALL_TABLE(r14) // X - ;; - mov r16=IA64_KR(CURRENT) // M2 (12 cyc) - shladd r18=r17,3,r14 // A - mov r19=NR_syscalls-1 // A - ;; - lfetch [r18] // M0|1 - mov r29=psr // M2 (12 cyc) - // If r17 is a NaT, p6 will be zero - cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? - ;; - mov r21=ar.fpsr // M2 (12 cyc) - tnat.nz p10,p9=r15 // I0 - mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) - ;; - srlz.d // M0 (forces split-issue) ensure PSR.BE==0 -(p6) ld8 r18=[r18] // M0|1 - nop.i 0 - ;; - nop.m 0 -(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) - nop.i 0 - ;; -(p8) ssm psr.i -(p6) mov b7=r18 // I0 -(p8) br.dptk.many b7 // B - - mov r27=ar.rsc // M2 (12 cyc) -/* - * brl.cond doesn't work as intended because the linker would convert this branch - * into a branch to a PLT. Perhaps there will be a way to avoid this with some - * future version of the linker. In the meantime, we just use an indirect branch - * instead. - */ -#ifdef CONFIG_ITANIUM -(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry - ;; -(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down - ;; -(p6) mov b7=r14 -(p6) br.sptk.many b7 -#else - BRL_COND_FSYS_BUBBLE_DOWN(p6) -#endif - ssm psr.i - mov r10=-1 -(p10) mov r8=EINVAL -(p9) mov r8=ENOSYS - FSYS_RETURN -END(__kernel_syscall_via_epc) - # define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) # define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) # define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) @@ -374,3 +294,92 @@ restore_rbs: // invala not necessary as that will happen when returning to user-mode br.cond.sptk back_from_restore_rbs END(__kernel_sigtramp) + +/* + * On entry: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * b6 = return address + * On exit: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * all other "scratch" registers: undefined + * all "preserved" registers: same as on entry + */ + +GLOBAL_ENTRY(__kernel_syscall_via_epc) + .prologue + .altrp b6 + .body +{ + /* + * Note: the kernel cannot assume that the first two instructions in this + * bundle get executed. The remaining code must be safe even if + * they do not get executed. + */ + adds r17=-1024,r15 // A + mov r10=0 // A default to successful syscall execution + epc // B causes split-issue +} + ;; + RSM_PSR_BE_I(r20, r22) // M2 (5 cyc to srlz.d) + LOAD_FSYSCALL_TABLE(r14) // X + ;; + mov r16=IA64_KR(CURRENT) // M2 (12 cyc) + shladd r18=r17,3,r14 // A + mov r19=NR_syscalls-1 // A + ;; + lfetch [r18] // M0|1 + MOV_FROM_PSR(p0, r29, r8) // M2 (12 cyc) + // If r17 is a NaT, p6 will be zero + cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? + ;; + mov r21=ar.fpsr // M2 (12 cyc) + tnat.nz p10,p9=r15 // I0 + mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) + ;; + srlz.d // M0 (forces split-issue) ensure PSR.BE==0 +(p6) ld8 r18=[r18] // M0|1 + nop.i 0 + ;; + nop.m 0 +(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) + nop.i 0 + ;; + SSM_PSR_I(p8, p14, r25) +(p6) mov b7=r18 // I0 +(p8) br.dptk.many b7 // B + + mov r27=ar.rsc // M2 (12 cyc) +/* + * brl.cond doesn't work as intended because the linker would convert this branch + * into a branch to a PLT. Perhaps there will be a way to avoid this with some + * future version of the linker. In the meantime, we just use an indirect branch + * instead. + */ +#ifdef CONFIG_ITANIUM +(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry + ;; +(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down + ;; +(p6) mov b7=r14 +(p6) br.sptk.many b7 +#else + BRL_COND_FSYS_BUBBLE_DOWN(p6) +#endif + SSM_PSR_I(p0, p14, r10) + mov r10=-1 +(p10) mov r8=EINVAL +(p9) mov r8=ENOSYS + FSYS_RETURN + +#ifdef CONFIG_PARAVIRT + /* + * padd to make the size of this symbol constant + * independent of paravirtualization. + */ + .align PAGE_SIZE / 8 +#endif +END(__kernel_syscall_via_epc) diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S index 3cb1abc00e24..88c64ed47c36 100644 --- a/arch/ia64/kernel/gate.lds.S +++ b/arch/ia64/kernel/gate.lds.S @@ -7,6 +7,7 @@ #include <asm/system.h> +#include "paravirt_patchlist.h" SECTIONS { @@ -33,21 +34,21 @@ SECTIONS . = GATE_ADDR + 0x600; .data.patch : { - __start_gate_mckinley_e9_patchlist = .; + __paravirt_start_gate_mckinley_e9_patchlist = .; *(.data.patch.mckinley_e9) - __end_gate_mckinley_e9_patchlist = .; + __paravirt_end_gate_mckinley_e9_patchlist = .; - __start_gate_vtop_patchlist = .; + __paravirt_start_gate_vtop_patchlist = .; *(.data.patch.vtop) - __end_gate_vtop_patchlist = .; + __paravirt_end_gate_vtop_patchlist = .; - __start_gate_fsyscall_patchlist = .; + __paravirt_start_gate_fsyscall_patchlist = .; *(.data.patch.fsyscall_table) - __end_gate_fsyscall_patchlist = .; + __paravirt_end_gate_fsyscall_patchlist = .; - __start_gate_brl_fsys_bubble_down_patchlist = .; + __paravirt_start_gate_brl_fsys_bubble_down_patchlist = .; *(.data.patch.brl_fsys_bubble_down) - __end_gate_brl_fsys_bubble_down_patchlist = .; + __paravirt_end_gate_brl_fsys_bubble_down_patchlist = .; } :readable .IA_64.unwind_info : { *(.IA_64.unwind_info*) } diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 59301c472800..23f846de62d5 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1050,7 +1050,7 @@ END(ia64_delay_loop) * except that the multiplication and the shift are done with 128-bit * intermediate precision so that we can produce a full 64-bit result. */ -GLOBAL_ENTRY(sched_clock) +GLOBAL_ENTRY(ia64_native_sched_clock) addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 mov.m r9=ar.itc // fetch cycle-counter (35 cyc) ;; @@ -1066,7 +1066,13 @@ GLOBAL_ENTRY(sched_clock) ;; shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT br.ret.sptk.many rp -END(sched_clock) +END(ia64_native_sched_clock) +#ifndef CONFIG_PARAVIRT + //unsigned long long + //sched_clock(void) __attribute__((alias("ia64_native_sched_clock"))); + .global sched_clock +sched_clock = ia64_native_sched_clock +#endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING GLOBAL_ENTRY(cycle_to_cputime) diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index f675d8e33853..ec9a5fdfa1b9 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -804,7 +804,7 @@ ENTRY(break_fault) /////////////////////////////////////////////////////////////////////// st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag #ifdef CONFIG_VIRT_CPU_ACCOUNTING - mov.m r30=ar.itc // M get cycle for accounting + MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting #else mov b6=r30 // I0 setup syscall handler branch reg early #endif diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index bab1de2d2f6a..8f33a8840422 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1456,9 +1456,9 @@ ia64_mca_cmc_int_caller(int cmc_irq, void *arg) ia64_mca_cmc_int_handler(cmc_irq, arg); - for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); + cpuid = cpumask_next(cpuid+1, cpu_online_mask); - if (cpuid < NR_CPUS) { + if (cpuid < nr_cpu_ids) { platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); } else { /* If no log record, switch out of polling mode */ @@ -1525,7 +1525,7 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg) ia64_mca_cpe_int_handler(cpe_irq, arg); - for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); + cpuid = cpumask_next(cpuid+1, cpu_online_mask); if (cpuid < NR_CPUS) { platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index aaa7d901521f..da3b0cf495a3 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -446,6 +446,14 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, mod->arch.opd = s; else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0) mod->arch.unwind = s; +#ifdef CONFIG_PARAVIRT + else if (strcmp(".paravirt_bundles", + secstrings + s->sh_name) == 0) + mod->arch.paravirt_bundles = s; + else if (strcmp(".paravirt_insts", + secstrings + s->sh_name) == 0) + mod->arch.paravirt_insts = s; +#endif if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) { printk(KERN_ERR "%s: sections missing\n", mod->name); @@ -525,8 +533,7 @@ get_ltoff (struct module *mod, uint64_t value, int *okp) goto found; /* Not enough GOT entries? */ - if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size)) - BUG(); + BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size)); e->val = value; ++mod->arch.next_got_entry; @@ -921,6 +928,30 @@ module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mo DEBUGP("%s: init: entry=%p\n", __func__, mod->init); if (mod->arch.unwind) register_unwind_table(mod); +#ifdef CONFIG_PARAVIRT + if (mod->arch.paravirt_bundles) { + struct paravirt_patch_site_bundle *start = + (struct paravirt_patch_site_bundle *) + mod->arch.paravirt_bundles->sh_addr; + struct paravirt_patch_site_bundle *end = + (struct paravirt_patch_site_bundle *) + (mod->arch.paravirt_bundles->sh_addr + + mod->arch.paravirt_bundles->sh_size); + + paravirt_patch_apply_bundle(start, end); + } + if (mod->arch.paravirt_insts) { + struct paravirt_patch_site_inst *start = + (struct paravirt_patch_site_inst *) + mod->arch.paravirt_insts->sh_addr; + struct paravirt_patch_site_inst *end = + (struct paravirt_patch_site_inst *) + (mod->arch.paravirt_insts->sh_addr + + mod->arch.paravirt_insts->sh_size); + + paravirt_patch_apply_inst(start, end); + } +#endif return 0; } diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 9f14c16f6369..a21d7bb9c69c 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -46,13 +46,23 @@ struct pv_info pv_info = { * initialization hooks. */ -struct pv_init_ops pv_init_ops; +static void __init +ia64_native_patch_branch(unsigned long tag, unsigned long type); + +struct pv_init_ops pv_init_ops = +{ +#ifdef ASM_SUPPORTED + .patch_bundle = ia64_native_patch_bundle, +#endif + .patch_branch = ia64_native_patch_branch, +}; /*************************************************************************** * pv_cpu_ops * intrinsics hooks. */ +#ifndef ASM_SUPPORTED /* ia64_native_xxx are macros so that we have to make them real functions */ #define DEFINE_VOID_FUNC1(name) \ @@ -60,7 +70,14 @@ struct pv_init_ops pv_init_ops; ia64_native_ ## name ## _func(unsigned long arg) \ { \ ia64_native_ ## name(arg); \ - } \ + } + +#define DEFINE_VOID_FUNC1_VOID(name) \ + static void \ + ia64_native_ ## name ## _func(void *arg) \ + { \ + ia64_native_ ## name(arg); \ + } #define DEFINE_VOID_FUNC2(name) \ static void \ @@ -68,7 +85,7 @@ struct pv_init_ops pv_init_ops; unsigned long arg1) \ { \ ia64_native_ ## name(arg0, arg1); \ - } \ + } #define DEFINE_FUNC0(name) \ static unsigned long \ @@ -84,7 +101,7 @@ struct pv_init_ops pv_init_ops; return ia64_native_ ## name(arg); \ } \ -DEFINE_VOID_FUNC1(fc); +DEFINE_VOID_FUNC1_VOID(fc); DEFINE_VOID_FUNC1(intrin_local_irq_restore); DEFINE_VOID_FUNC2(ptcga); @@ -274,6 +291,266 @@ ia64_native_setreg_func(int regnum, unsigned long val) break; } } +#else + +#define __DEFINE_FUNC(name, code) \ + extern const char ia64_native_ ## name ## _direct_start[]; \ + extern const char ia64_native_ ## name ## _direct_end[]; \ + asm (".align 32\n" \ + ".proc ia64_native_" #name "_func\n" \ + "ia64_native_" #name "_func:\n" \ + "ia64_native_" #name "_direct_start:\n" \ + code \ + "ia64_native_" #name "_direct_end:\n" \ + "br.cond.sptk.many b6\n" \ + ".endp ia64_native_" #name "_func\n") + +#define DEFINE_VOID_FUNC0(name, code) \ + extern void \ + ia64_native_ ## name ## _func(void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1(name, code) \ + extern void \ + ia64_native_ ## name ## _func(unsigned long arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1_VOID(name, code) \ + extern void \ + ia64_native_ ## name ## _func(void *arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC2(name, code) \ + extern void \ + ia64_native_ ## name ## _func(unsigned long arg0, \ + unsigned long arg1); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC0(name, code) \ + extern unsigned long \ + ia64_native_ ## name ## _func(void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC1(name, type, code) \ + extern unsigned long \ + ia64_native_ ## name ## _func(type arg); \ + __DEFINE_FUNC(name, code) + +DEFINE_VOID_FUNC1_VOID(fc, + "fc r8\n"); +DEFINE_VOID_FUNC1(intrin_local_irq_restore, + ";;\n" + " cmp.ne p6, p7 = r8, r0\n" + ";;\n" + "(p6) ssm psr.i\n" + "(p7) rsm psr.i\n" + ";;\n" + "(p6) srlz.d\n"); + +DEFINE_VOID_FUNC2(ptcga, + "ptc.ga r8, r9\n"); +DEFINE_VOID_FUNC2(set_rr, + "mov rr[r8] = r9\n"); + +/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */ +DEFINE_FUNC0(get_psr_i, + "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n" + "mov r8 = psr\n" + ";;\n" + "and r8 = r2, r8\n"); + +DEFINE_FUNC1(thash, unsigned long, + "thash r8 = r8\n"); +DEFINE_FUNC1(get_cpuid, int, + "mov r8 = cpuid[r8]\n"); +DEFINE_FUNC1(get_pmd, int, + "mov r8 = pmd[r8]\n"); +DEFINE_FUNC1(get_rr, unsigned long, + "mov r8 = rr[r8]\n"); + +DEFINE_VOID_FUNC0(ssm_i, + "ssm psr.i\n"); +DEFINE_VOID_FUNC0(rsm_i, + "rsm psr.i\n"); + +extern void +ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4); +__DEFINE_FUNC(set_rr0_to_rr4, + "mov rr[r0] = r8\n" + "movl r2 = 0x2000000000000000\n" + ";;\n" + "mov rr[r2] = r9\n" + "shl r3 = r2, 1\n" /* movl r3 = 0x4000000000000000 */ + ";;\n" + "add r2 = r2, r3\n" /* movl r2 = 0x6000000000000000 */ + "mov rr[r3] = r10\n" + ";;\n" + "mov rr[r2] = r11\n" + "shl r3 = r3, 1\n" /* movl r3 = 0x8000000000000000 */ + ";;\n" + "mov rr[r3] = r14\n"); + +extern unsigned long ia64_native_getreg_func(int regnum); +asm(".global ia64_native_getreg_func\n"); +#define __DEFINE_GET_REG(id, reg) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r8\n" \ + ";;\n" \ + "(p6) mov r8 = " #reg "\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" +#define __DEFINE_GET_AR(id, reg) __DEFINE_GET_REG(AR_ ## id, ar.reg) +#define __DEFINE_GET_CR(id, reg) __DEFINE_GET_REG(CR_ ## id, cr.reg) + +__DEFINE_FUNC(getreg, + __DEFINE_GET_REG(GP, gp) + /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */ + __DEFINE_GET_REG(PSR, psr) + __DEFINE_GET_REG(TP, tp) + __DEFINE_GET_REG(SP, sp) + + __DEFINE_GET_REG(AR_KR0, ar0) + __DEFINE_GET_REG(AR_KR1, ar1) + __DEFINE_GET_REG(AR_KR2, ar2) + __DEFINE_GET_REG(AR_KR3, ar3) + __DEFINE_GET_REG(AR_KR4, ar4) + __DEFINE_GET_REG(AR_KR5, ar5) + __DEFINE_GET_REG(AR_KR6, ar6) + __DEFINE_GET_REG(AR_KR7, ar7) + __DEFINE_GET_AR(RSC, rsc) + __DEFINE_GET_AR(BSP, bsp) + __DEFINE_GET_AR(BSPSTORE, bspstore) + __DEFINE_GET_AR(RNAT, rnat) + __DEFINE_GET_AR(FCR, fcr) + __DEFINE_GET_AR(EFLAG, eflag) + __DEFINE_GET_AR(CSD, csd) + __DEFINE_GET_AR(SSD, ssd) + __DEFINE_GET_REG(AR_CFLAG, ar27) + __DEFINE_GET_AR(FSR, fsr) + __DEFINE_GET_AR(FIR, fir) + __DEFINE_GET_AR(FDR, fdr) + __DEFINE_GET_AR(CCV, ccv) + __DEFINE_GET_AR(UNAT, unat) + __DEFINE_GET_AR(FPSR, fpsr) + __DEFINE_GET_AR(ITC, itc) + __DEFINE_GET_AR(PFS, pfs) + __DEFINE_GET_AR(LC, lc) + __DEFINE_GET_AR(EC, ec) + + __DEFINE_GET_CR(DCR, dcr) + __DEFINE_GET_CR(ITM, itm) + __DEFINE_GET_CR(IVA, iva) + __DEFINE_GET_CR(PTA, pta) + __DEFINE_GET_CR(IPSR, ipsr) + __DEFINE_GET_CR(ISR, isr) + __DEFINE_GET_CR(IIP, iip) + __DEFINE_GET_CR(IFA, ifa) + __DEFINE_GET_CR(ITIR, itir) + __DEFINE_GET_CR(IIPA, iipa) + __DEFINE_GET_CR(IFS, ifs) + __DEFINE_GET_CR(IIM, iim) + __DEFINE_GET_CR(IHA, iha) + __DEFINE_GET_CR(LID, lid) + __DEFINE_GET_CR(IVR, ivr) + __DEFINE_GET_CR(TPR, tpr) + __DEFINE_GET_CR(EOI, eoi) + __DEFINE_GET_CR(IRR0, irr0) + __DEFINE_GET_CR(IRR1, irr1) + __DEFINE_GET_CR(IRR2, irr2) + __DEFINE_GET_CR(IRR3, irr3) + __DEFINE_GET_CR(ITV, itv) + __DEFINE_GET_CR(PMV, pmv) + __DEFINE_GET_CR(CMCV, cmcv) + __DEFINE_GET_CR(LRR0, lrr0) + __DEFINE_GET_CR(LRR1, lrr1) + + "mov r8 = -1\n" /* unsupported case */ + ); + +extern void ia64_native_setreg_func(int regnum, unsigned long val); +asm(".global ia64_native_setreg_func\n"); +#define __DEFINE_SET_REG(id, reg) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r9\n" \ + ";;\n" \ + "(p6) mov " #reg " = r8\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" +#define __DEFINE_SET_AR(id, reg) __DEFINE_SET_REG(AR_ ## id, ar.reg) +#define __DEFINE_SET_CR(id, reg) __DEFINE_SET_REG(CR_ ## id, cr.reg) +__DEFINE_FUNC(setreg, + "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r9\n" + ";;\n" + "(p6) mov psr.l = r8\n" +#ifdef HAVE_SERIALIZE_DIRECTIVE + ".serialize.data\n" +#endif + "(p6) br.cond.sptk.many b6\n" + __DEFINE_SET_REG(GP, gp) + __DEFINE_SET_REG(SP, sp) + + __DEFINE_SET_REG(AR_KR0, ar0) + __DEFINE_SET_REG(AR_KR1, ar1) + __DEFINE_SET_REG(AR_KR2, ar2) + __DEFINE_SET_REG(AR_KR3, ar3) + __DEFINE_SET_REG(AR_KR4, ar4) + __DEFINE_SET_REG(AR_KR5, ar5) + __DEFINE_SET_REG(AR_KR6, ar6) + __DEFINE_SET_REG(AR_KR7, ar7) + __DEFINE_SET_AR(RSC, rsc) + __DEFINE_SET_AR(BSP, bsp) + __DEFINE_SET_AR(BSPSTORE, bspstore) + __DEFINE_SET_AR(RNAT, rnat) + __DEFINE_SET_AR(FCR, fcr) + __DEFINE_SET_AR(EFLAG, eflag) + __DEFINE_SET_AR(CSD, csd) + __DEFINE_SET_AR(SSD, ssd) + __DEFINE_SET_REG(AR_CFLAG, ar27) + __DEFINE_SET_AR(FSR, fsr) + __DEFINE_SET_AR(FIR, fir) + __DEFINE_SET_AR(FDR, fdr) + __DEFINE_SET_AR(CCV, ccv) + __DEFINE_SET_AR(UNAT, unat) + __DEFINE_SET_AR(FPSR, fpsr) + __DEFINE_SET_AR(ITC, itc) + __DEFINE_SET_AR(PFS, pfs) + __DEFINE_SET_AR(LC, lc) + __DEFINE_SET_AR(EC, ec) + + __DEFINE_SET_CR(DCR, dcr) + __DEFINE_SET_CR(ITM, itm) + __DEFINE_SET_CR(IVA, iva) + __DEFINE_SET_CR(PTA, pta) + __DEFINE_SET_CR(IPSR, ipsr) + __DEFINE_SET_CR(ISR, isr) + __DEFINE_SET_CR(IIP, iip) + __DEFINE_SET_CR(IFA, ifa) + __DEFINE_SET_CR(ITIR, itir) + __DEFINE_SET_CR(IIPA, iipa) + __DEFINE_SET_CR(IFS, ifs) + __DEFINE_SET_CR(IIM, iim) + __DEFINE_SET_CR(IHA, iha) + __DEFINE_SET_CR(LID, lid) + __DEFINE_SET_CR(IVR, ivr) + __DEFINE_SET_CR(TPR, tpr) + __DEFINE_SET_CR(EOI, eoi) + __DEFINE_SET_CR(IRR0, irr0) + __DEFINE_SET_CR(IRR1, irr1) + __DEFINE_SET_CR(IRR2, irr2) + __DEFINE_SET_CR(IRR3, irr3) + __DEFINE_SET_CR(ITV, itv) + __DEFINE_SET_CR(PMV, pmv) + __DEFINE_SET_CR(CMCV, cmcv) + __DEFINE_SET_CR(LRR0, lrr0) + __DEFINE_SET_CR(LRR1, lrr1) + ); +#endif struct pv_cpu_ops pv_cpu_ops = { .fc = ia64_native_fc_func, @@ -366,4 +643,258 @@ ia64_native_do_steal_accounting(unsigned long *new_itm) struct pv_time_ops pv_time_ops = { .do_steal_accounting = ia64_native_do_steal_accounting, + .sched_clock = ia64_native_sched_clock, +}; + +/*************************************************************************** + * binary pacthing + * pv_init_ops.patch_bundle + */ + +#ifdef ASM_SUPPORTED +#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg) \ + __DEFINE_FUNC(get_ ## name, \ + ";;\n" \ + "mov r8 = " #reg "\n" \ + ";;\n") + +#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \ + __DEFINE_FUNC(set_ ## name, \ + ";;\n" \ + "mov " #reg " = r8\n" \ + ";;\n") + +#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg); \ + IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \ + +#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg) + +#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg) + + +IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr); +IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp); + +/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */ +__DEFINE_FUNC(set_psr_l, + ";;\n" + "mov psr.l = r8\n" +#ifdef HAVE_SERIALIZE_DIRECTIVE + ".serialize.data\n" +#endif + ";;\n"); + +IA64_NATIVE_PATCH_DEFINE_REG(gp, gp); +IA64_NATIVE_PATCH_DEFINE_REG(sp, sp); + +IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0); +IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1); +IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2); +IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3); +IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4); +IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5); +IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6); +IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7); + +IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc); +IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp); +IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore); +IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat); +IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr); +IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag); +IA64_NATIVE_PATCH_DEFINE_AR(csd, csd); +IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd); +IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27); +IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr); +IA64_NATIVE_PATCH_DEFINE_AR(fir, fir); +IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr); +IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv); +IA64_NATIVE_PATCH_DEFINE_AR(unat, unat); +IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr); +IA64_NATIVE_PATCH_DEFINE_AR(itc, itc); +IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs); +IA64_NATIVE_PATCH_DEFINE_AR(lc, lc); +IA64_NATIVE_PATCH_DEFINE_AR(ec, ec); + +IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr); +IA64_NATIVE_PATCH_DEFINE_CR(itm, itm); +IA64_NATIVE_PATCH_DEFINE_CR(iva, iva); +IA64_NATIVE_PATCH_DEFINE_CR(pta, pta); +IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr); +IA64_NATIVE_PATCH_DEFINE_CR(isr, isr); +IA64_NATIVE_PATCH_DEFINE_CR(iip, iip); +IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa); +IA64_NATIVE_PATCH_DEFINE_CR(itir, itir); +IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa); +IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs); +IA64_NATIVE_PATCH_DEFINE_CR(iim, iim); +IA64_NATIVE_PATCH_DEFINE_CR(iha, iha); +IA64_NATIVE_PATCH_DEFINE_CR(lid, lid); +IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr); +IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr); +IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi); +IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0); +IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1); +IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2); +IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3); +IA64_NATIVE_PATCH_DEFINE_CR(itv, itv); +IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv); +IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv); +IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0); +IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1); + +static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[] +__initdata_or_module = +{ +#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type) \ + { \ + (void*)ia64_native_ ## name ## _direct_start, \ + (void*)ia64_native_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_ ## type, \ + } + + IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC), + IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD), + IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR), + IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR), + IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4), + IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore, + INTRIN_LOCAL_IRQ_RESTORE), + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg) \ + { \ + (void*)ia64_native_get_ ## name ## _direct_start, \ + (void*)ia64_native_get_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \ + } + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + { \ + (void*)ia64_native_set_ ## name ## _direct_start, \ + (void*)ia64_native_set_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \ + } + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg), \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg) + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg) + + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1), }; + +unsigned long __init_or_module +ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type) +{ + const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) / + sizeof(ia64_native_patch_bundle_elems[0]); + + return __paravirt_patch_apply_bundle(sbundle, ebundle, type, + ia64_native_patch_bundle_elems, + nelems, NULL); +} +#endif /* ASM_SUPPOTED */ + +extern const char ia64_native_switch_to[]; +extern const char ia64_native_leave_syscall[]; +extern const char ia64_native_work_processed_syscall[]; +extern const char ia64_native_leave_kernel[]; + +const struct paravirt_patch_branch_target ia64_native_branch_target[] +__initconst = { +#define PARAVIRT_BR_TARGET(name, type) \ + { \ + ia64_native_ ## name, \ + PARAVIRT_PATCH_TYPE_BR_ ## type, \ + } + PARAVIRT_BR_TARGET(switch_to, SWITCH_TO), + PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL), + PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL), + PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL), +}; + +static void __init +ia64_native_patch_branch(unsigned long tag, unsigned long type) +{ + const unsigned long nelem = + sizeof(ia64_native_branch_target) / + sizeof(ia64_native_branch_target[0]); + __paravirt_patch_apply_branch(tag, type, + ia64_native_branch_target, nelem); +} diff --git a/arch/ia64/kernel/paravirt_patch.c b/arch/ia64/kernel/paravirt_patch.c new file mode 100644 index 000000000000..bfdfef1b1ffd --- /dev/null +++ b/arch/ia64/kernel/paravirt_patch.c @@ -0,0 +1,514 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_patch.c + * + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/init.h> +#include <asm/intrinsics.h> +#include <asm/kprobes.h> +#include <asm/paravirt.h> +#include <asm/paravirt_patch.h> + +typedef union ia64_inst { + struct { + unsigned long long qp : 6; + unsigned long long : 31; + unsigned long long opcode : 4; + unsigned long long reserved : 23; + } generic; + unsigned long long l; +} ia64_inst_t; + +/* + * flush_icache_range() can't be used here. + * we are here before cpu_init() which initializes + * ia64_i_cache_stride_shift. flush_icache_range() uses it. + */ +void __init_or_module +paravirt_flush_i_cache_range(const void *instr, unsigned long size) +{ + extern void paravirt_fc_i(const void *addr); + unsigned long i; + + for (i = 0; i < size; i += sizeof(bundle_t)) + paravirt_fc_i(instr + i); +} + +bundle_t* __init_or_module +paravirt_get_bundle(unsigned long tag) +{ + return (bundle_t *)(tag & ~3UL); +} + +unsigned long __init_or_module +paravirt_get_slot(unsigned long tag) +{ + return tag & 3UL; +} + +unsigned long __init_or_module +paravirt_get_num_inst(unsigned long stag, unsigned long etag) +{ + bundle_t *sbundle = paravirt_get_bundle(stag); + unsigned long sslot = paravirt_get_slot(stag); + bundle_t *ebundle = paravirt_get_bundle(etag); + unsigned long eslot = paravirt_get_slot(etag); + + return (ebundle - sbundle) * 3 + eslot - sslot + 1; +} + +unsigned long __init_or_module +paravirt_get_next_tag(unsigned long tag) +{ + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + case 1: + return tag + 1; + case 2: { + bundle_t *bundle = paravirt_get_bundle(tag); + return (unsigned long)(bundle + 1); + } + default: + BUG(); + } + /* NOTREACHED */ +} + +ia64_inst_t __init_or_module +paravirt_read_slot0(const bundle_t *bundle) +{ + ia64_inst_t inst; + inst.l = bundle->quad0.slot0; + return inst; +} + +ia64_inst_t __init_or_module +paravirt_read_slot1(const bundle_t *bundle) +{ + ia64_inst_t inst; + inst.l = bundle->quad0.slot1_p0 | + ((unsigned long long)bundle->quad1.slot1_p1 << 18UL); + return inst; +} + +ia64_inst_t __init_or_module +paravirt_read_slot2(const bundle_t *bundle) +{ + ia64_inst_t inst; + inst.l = bundle->quad1.slot2; + return inst; +} + +ia64_inst_t __init_or_module +paravirt_read_inst(unsigned long tag) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + return paravirt_read_slot0(bundle); + case 1: + return paravirt_read_slot1(bundle); + case 2: + return paravirt_read_slot2(bundle); + default: + BUG(); + } + /* NOTREACHED */ +} + +void __init_or_module +paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst) +{ + bundle->quad0.slot0 = inst.l; +} + +void __init_or_module +paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst) +{ + bundle->quad0.slot1_p0 = inst.l; + bundle->quad1.slot1_p1 = inst.l >> 18UL; +} + +void __init_or_module +paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst) +{ + bundle->quad1.slot2 = inst.l; +} + +void __init_or_module +paravirt_write_inst(unsigned long tag, ia64_inst_t inst) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + paravirt_write_slot0(bundle, inst); + break; + case 1: + paravirt_write_slot1(bundle, inst); + break; + case 2: + paravirt_write_slot2(bundle, inst); + break; + default: + BUG(); + break; + } + paravirt_flush_i_cache_range(bundle, sizeof(*bundle)); +} + +/* for debug */ +void +paravirt_print_bundle(const bundle_t *bundle) +{ + const unsigned long *quad = (const unsigned long *)bundle; + ia64_inst_t slot0 = paravirt_read_slot0(bundle); + ia64_inst_t slot1 = paravirt_read_slot1(bundle); + ia64_inst_t slot2 = paravirt_read_slot2(bundle); + + printk(KERN_DEBUG + "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]); + printk(KERN_DEBUG + "bundle template 0x%x\n", + bundle->quad0.template); + printk(KERN_DEBUG + "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n", + (unsigned long)bundle->quad0.slot0, + (unsigned long)bundle->quad0.slot1_p0, + (unsigned long)bundle->quad1.slot1_p1, + (unsigned long)bundle->quad1.slot2); + printk(KERN_DEBUG + "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n", + slot0.l, slot1.l, slot2.l); +} + +static int noreplace_paravirt __init_or_module = 0; + +static int __init setup_noreplace_paravirt(char *str) +{ + noreplace_paravirt = 1; + return 1; +} +__setup("noreplace-paravirt", setup_noreplace_paravirt); + +#ifdef ASM_SUPPORTED +static void __init_or_module +fill_nop_bundle(void *sbundle, void *ebundle) +{ + extern const char paravirt_nop_bundle[]; + extern const unsigned long paravirt_nop_bundle_size; + + void *bundle = sbundle; + + BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0); + BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0); + + while (bundle < ebundle) { + memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size); + + bundle += paravirt_nop_bundle_size; + } +} + +/* helper function */ +unsigned long __init_or_module +__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type, + const struct paravirt_patch_bundle_elem *elems, + unsigned long nelems, + const struct paravirt_patch_bundle_elem **found) +{ + unsigned long used = 0; + unsigned long i; + + BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0); + BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0); + + found = NULL; + for (i = 0; i < nelems; i++) { + const struct paravirt_patch_bundle_elem *p = &elems[i]; + if (p->type == type) { + unsigned long need = p->ebundle - p->sbundle; + unsigned long room = ebundle - sbundle; + + if (found != NULL) + *found = p; + + if (room < need) { + /* no room to replace. skip it */ + printk(KERN_DEBUG + "the space is too small to put " + "bundles. type %ld need %ld room %ld\n", + type, need, room); + break; + } + + used = need; + memcpy(sbundle, p->sbundle, used); + break; + } + } + + return used; +} + +void __init_or_module +paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start, + const struct paravirt_patch_site_bundle *end) +{ + const struct paravirt_patch_site_bundle *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_bundle == NULL) + return; + + for (p = start; p < end; p++) { + unsigned long used; + + used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle, + p->type); + if (used == 0) + continue; + + fill_nop_bundle(p->sbundle + used, p->ebundle); + paravirt_flush_i_cache_range(p->sbundle, + p->ebundle - p->sbundle); + } + ia64_sync_i(); + ia64_srlz_i(); +} + +/* + * nop.i, nop.m, nop.f instruction are same format. + * but nop.b has differennt format. + * This doesn't support nop.b for now. + */ +static void __init_or_module +fill_nop_inst(unsigned long stag, unsigned long etag) +{ + extern const bundle_t paravirt_nop_mfi_inst_bundle[]; + unsigned long tag; + const ia64_inst_t nop_inst = + paravirt_read_slot0(paravirt_nop_mfi_inst_bundle); + + for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag)) + paravirt_write_inst(tag, nop_inst); +} + +void __init_or_module +paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start, + const struct paravirt_patch_site_inst *end) +{ + const struct paravirt_patch_site_inst *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_inst == NULL) + return; + + for (p = start; p < end; p++) { + unsigned long tag; + bundle_t *sbundle; + bundle_t *ebundle; + + tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type); + if (tag == p->stag) + continue; + + fill_nop_inst(tag, p->etag); + sbundle = paravirt_get_bundle(p->stag); + ebundle = paravirt_get_bundle(p->etag) + 1; + paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) * + sizeof(bundle_t)); + } + ia64_sync_i(); + ia64_srlz_i(); +} +#endif /* ASM_SUPPOTED */ + +/* brl.cond.sptk.many <target64> X3 */ +typedef union inst_x3_op { + ia64_inst_t inst; + struct { + unsigned long qp: 6; + unsigned long btyp: 3; + unsigned long unused: 3; + unsigned long p: 1; + unsigned long imm20b: 20; + unsigned long wh: 2; + unsigned long d: 1; + unsigned long i: 1; + unsigned long opcode: 4; + }; + unsigned long l; +} inst_x3_op_t; + +typedef union inst_x3_imm { + ia64_inst_t inst; + struct { + unsigned long unused: 2; + unsigned long imm39: 39; + }; + unsigned long l; +} inst_x3_imm_t; + +void __init_or_module +paravirt_patch_reloc_brl(unsigned long tag, const void *target) +{ + unsigned long tag_op = paravirt_get_next_tag(tag); + unsigned long tag_imm = tag; + bundle_t *bundle = paravirt_get_bundle(tag); + + ia64_inst_t inst_op = paravirt_read_inst(tag_op); + ia64_inst_t inst_imm = paravirt_read_inst(tag_imm); + + inst_x3_op_t inst_x3_op = { .l = inst_op.l }; + inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l }; + + unsigned long imm60 = + ((unsigned long)target - (unsigned long)bundle) >> 4; + + BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */ + BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0); + + /* imm60[59] 1bit */ + inst_x3_op.i = (imm60 >> 59) & 1; + /* imm60[19:0] 20bit */ + inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1); + /* imm60[58:20] 39bit */ + inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1); + + inst_op.l = inst_x3_op.l; + inst_imm.l = inst_x3_imm.l; + + paravirt_write_inst(tag_op, inst_op); + paravirt_write_inst(tag_imm, inst_imm); +} + +/* br.cond.sptk.many <target25> B1 */ +typedef union inst_b1 { + ia64_inst_t inst; + struct { + unsigned long qp: 6; + unsigned long btype: 3; + unsigned long unused: 3; + unsigned long p: 1; + unsigned long imm20b: 20; + unsigned long wh: 2; + unsigned long d: 1; + unsigned long s: 1; + unsigned long opcode: 4; + }; + unsigned long l; +} inst_b1_t; + +void __init +paravirt_patch_reloc_br(unsigned long tag, const void *target) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + ia64_inst_t inst = paravirt_read_inst(tag); + unsigned long target25 = (unsigned long)target - (unsigned long)bundle; + inst_b1_t inst_b1; + + BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0); + + inst_b1.l = inst.l; + if (target25 & (1UL << 63)) + inst_b1.s = 1; + else + inst_b1.s = 0; + + inst_b1.imm20b = target25 >> 4; + inst.l = inst_b1.l; + + paravirt_write_inst(tag, inst); +} + +void __init +__paravirt_patch_apply_branch( + unsigned long tag, unsigned long type, + const struct paravirt_patch_branch_target *entries, + unsigned int nr_entries) +{ + unsigned int i; + for (i = 0; i < nr_entries; i++) { + if (entries[i].type == type) { + paravirt_patch_reloc_br(tag, entries[i].entry); + break; + } + } +} + +static void __init +paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start, + const struct paravirt_patch_site_branch *end) +{ + const struct paravirt_patch_site_branch *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_branch == NULL) + return; + + for (p = start; p < end; p++) + (*pv_init_ops.patch_branch)(p->tag, p->type); + + ia64_sync_i(); + ia64_srlz_i(); +} + +void __init +paravirt_patch_apply(void) +{ + extern const char __start_paravirt_bundles[]; + extern const char __stop_paravirt_bundles[]; + extern const char __start_paravirt_insts[]; + extern const char __stop_paravirt_insts[]; + extern const char __start_paravirt_branches[]; + extern const char __stop_paravirt_branches[]; + + paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *) + __start_paravirt_bundles, + (const struct paravirt_patch_site_bundle *) + __stop_paravirt_bundles); + paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *) + __start_paravirt_insts, + (const struct paravirt_patch_site_inst *) + __stop_paravirt_insts); + paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *) + __start_paravirt_branches, + (const struct paravirt_patch_site_branch *) + __stop_paravirt_branches); +} + +/* + * Local variables: + * mode: C + * c-set-style: "linux" + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + */ diff --git a/arch/ia64/kernel/paravirt_patchlist.c b/arch/ia64/kernel/paravirt_patchlist.c new file mode 100644 index 000000000000..b28082a95d45 --- /dev/null +++ b/arch/ia64/kernel/paravirt_patchlist.c @@ -0,0 +1,79 @@ +/****************************************************************************** + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/bug.h> +#include <asm/paravirt.h> + +#define DECLARE(name) \ + extern unsigned long \ + __ia64_native_start_gate_##name##_patchlist[]; \ + extern unsigned long \ + __ia64_native_end_gate_##name##_patchlist[] + +DECLARE(fsyscall); +DECLARE(brl_fsys_bubble_down); +DECLARE(vtop); +DECLARE(mckinley_e9); + +extern unsigned long __start_gate_section[]; + +#define ASSIGN(name) \ + .start_##name##_patchlist = \ + (unsigned long)__ia64_native_start_gate_##name##_patchlist, \ + .end_##name##_patchlist = \ + (unsigned long)__ia64_native_end_gate_##name##_patchlist + +struct pv_patchdata pv_patchdata __initdata = { + ASSIGN(fsyscall), + ASSIGN(brl_fsys_bubble_down), + ASSIGN(vtop), + ASSIGN(mckinley_e9), + + .gate_section = (void*)__start_gate_section, +}; + + +unsigned long __init +paravirt_get_gate_patchlist(enum pv_gate_patchlist type) +{ + +#define CASE(NAME, name) \ + case PV_GATE_START_##NAME: \ + return pv_patchdata.start_##name##_patchlist; \ + case PV_GATE_END_##NAME: \ + return pv_patchdata.end_##name##_patchlist; \ + + switch (type) { + CASE(FSYSCALL, fsyscall); + CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down); + CASE(VTOP, vtop); + CASE(MCKINLEY_E9, mckinley_e9); + default: + BUG(); + break; + } + return 0; +} + +void * __init +paravirt_get_gate_section(void) +{ + return pv_patchdata.gate_section; +} diff --git a/arch/ia64/kernel/paravirt_patchlist.h b/arch/ia64/kernel/paravirt_patchlist.h new file mode 100644 index 000000000000..0684aa6c6507 --- /dev/null +++ b/arch/ia64/kernel/paravirt_patchlist.h @@ -0,0 +1,28 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_patchlist.h + * + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#if defined(__IA64_GATE_PARAVIRTUALIZED_XEN) +#include <asm/xen/patchlist.h> +#else +#include <asm/native/patchlist.h> +#endif + diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S index 2f42fcb9776a..6158560d7f17 100644 --- a/arch/ia64/kernel/paravirtentry.S +++ b/arch/ia64/kernel/paravirtentry.S @@ -20,8 +20,11 @@ * */ +#include <linux/init.h> #include <asm/asmmacro.h> #include <asm/asm-offsets.h> +#include <asm/paravirt_privop.h> +#include <asm/paravirt_patch.h> #include "entry.h" #define DATA8(sym, init_value) \ @@ -32,29 +35,87 @@ data8 init_value ; \ .popsection -#define BRANCH(targ, reg, breg) \ - movl reg=targ ; \ - ;; \ - ld8 reg=[reg] ; \ - ;; \ - mov breg=reg ; \ +#define BRANCH(targ, reg, breg, type) \ + PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ; \ + ;; \ + movl reg=targ ; \ + ;; \ + ld8 reg=[reg] ; \ + ;; \ + mov breg=reg ; \ br.cond.sptk.many breg -#define BRANCH_PROC(sym, reg, breg) \ - DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ - GLOBAL_ENTRY(paravirt_ ## sym) ; \ - BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ +#define BRANCH_PROC(sym, reg, breg, type) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \ END(paravirt_ ## sym) -#define BRANCH_PROC_UNWINFO(sym, reg, breg) \ - DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ - GLOBAL_ENTRY(paravirt_ ## sym) ; \ - PT_REGS_UNWIND_INFO(0) ; \ - BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ +#define BRANCH_PROC_UNWINFO(sym, reg, breg, type) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + PT_REGS_UNWIND_INFO(0) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \ END(paravirt_ ## sym) -BRANCH_PROC(switch_to, r22, b7) -BRANCH_PROC_UNWINFO(leave_syscall, r22, b7) -BRANCH_PROC(work_processed_syscall, r2, b7) -BRANCH_PROC_UNWINFO(leave_kernel, r22, b7) +BRANCH_PROC(switch_to, r22, b7, SWITCH_TO) +BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL) +BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL) +BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL) + + +#ifdef CONFIG_MODULES +#define __INIT_OR_MODULE .text +#define __INITDATA_OR_MODULE .data +#else +#define __INIT_OR_MODULE __INIT +#define __INITDATA_OR_MODULE __INITDATA +#endif /* CONFIG_MODULES */ + + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_fc_i) + fc.i r32 + br.ret.sptk.many rp + END(paravirt_fc_i) + __FINIT + + __INIT_OR_MODULE + .align 32 + GLOBAL_ENTRY(paravirt_nop_b_inst_bundle) + { + nop.b 0 + nop.b 0 + nop.b 0 + } + END(paravirt_nop_b_inst_bundle) + __FINIT + + /* NOTE: nop.[mfi] has same format */ + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle) + { + nop.m 0 + nop.f 0 + nop.i 0 + } + END(paravirt_nop_mfi_inst_bundle) + __FINIT + + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_nop_bundle) +paravirt_nop_bundle_start: + { + nop 0 + nop 0 + nop 0 + } +paravirt_nop_bundle_end: + END(paravirt_nop_bundle) + __FINIT + + __INITDATA_OR_MODULE + .align 8 + .global paravirt_nop_bundle_size +paravirt_nop_bundle_size: + data8 paravirt_nop_bundle_end - paravirt_nop_bundle_start diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index b83b2c516008..68a1311db806 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/string.h> +#include <asm/paravirt.h> #include <asm/patch.h> #include <asm/processor.h> #include <asm/sections.h> @@ -169,16 +170,35 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) ia64_srlz_i(); } +extern unsigned long ia64_native_fsyscall_table[NR_syscalls]; +extern char ia64_native_fsys_bubble_down[]; +struct pv_fsys_data pv_fsys_data __initdata = { + .fsyscall_table = (unsigned long *)ia64_native_fsyscall_table, + .fsys_bubble_down = (void *)ia64_native_fsys_bubble_down, +}; + +unsigned long * __init +paravirt_get_fsyscall_table(void) +{ + return pv_fsys_data.fsyscall_table; +} + +char * __init +paravirt_get_fsys_bubble_down(void) +{ + return pv_fsys_data.fsys_bubble_down; +} + static void __init patch_fsyscall_table (unsigned long start, unsigned long end) { - extern unsigned long fsyscall_table[NR_syscalls]; + u64 fsyscall_table = (u64)paravirt_get_fsyscall_table(); s32 *offp = (s32 *) start; u64 ip; while (offp < (s32 *) end) { ip = (u64) ia64_imva((char *) offp + *offp); - ia64_patch_imm64(ip, (u64) fsyscall_table); + ia64_patch_imm64(ip, fsyscall_table); ia64_fc((void *) ip); ++offp; } @@ -189,7 +209,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end) static void __init patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) { - extern char fsys_bubble_down[]; + u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down(); s32 *offp = (s32 *) start; u64 ip; @@ -207,13 +227,13 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) void __init ia64_patch_gate (void) { -# define START(name) ((unsigned long) __start_gate_##name##_patchlist) -# define END(name) ((unsigned long)__end_gate_##name##_patchlist) +# define START(name) paravirt_get_gate_patchlist(PV_GATE_START_##name) +# define END(name) paravirt_get_gate_patchlist(PV_GATE_END_##name) - patch_fsyscall_table(START(fsyscall), END(fsyscall)); - patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); - ia64_patch_vtop(START(vtop), END(vtop)); - ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); + patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL)); + patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN)); + ia64_patch_vtop(START(VTOP), END(VTOP)); + ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9)); } void ia64_patch_phys_stack_reg(unsigned long val) @@ -229,7 +249,7 @@ void ia64_patch_phys_stack_reg(unsigned long val) while (offp < end) { ip = (u64) offp + *offp; ia64_patch(ip, mask, imm); - ia64_fc(ip); + ia64_fc((void *)ip); ++offp; } ia64_sync_i(); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 5c0f408cfd71..8a06dc480594 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -5603,7 +5603,7 @@ pfm_interrupt_handler(int irq, void *arg) * /proc/perfmon interface, for debug only */ -#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1) +#define PFM_PROC_SHOW_HEADER ((void *)nr_cpu_ids+1) static void * pfm_proc_start(struct seq_file *m, loff_t *pos) @@ -5612,7 +5612,7 @@ pfm_proc_start(struct seq_file *m, loff_t *pos) return PFM_PROC_SHOW_HEADER; } - while (*pos <= NR_CPUS) { + while (*pos <= nr_cpu_ids) { if (cpu_online(*pos - 1)) { return (void *)*pos; } diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index ecb9eb78d687..7053c55b7649 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -317,7 +317,7 @@ retry: } n = data->cpu_check; - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { if (cpu_isset(n, data->cpu_event)) { if (!cpu_online(n)) { cpu_clear(n, data->cpu_event); @@ -326,7 +326,7 @@ retry: cpu = n; break; } - if (++n == NR_CPUS) + if (++n == nr_cpu_ids) n = 0; } @@ -337,7 +337,7 @@ retry: /* for next read, start checking at next CPU */ data->cpu_check = cpu; - if (++data->cpu_check == NR_CPUS) + if (++data->cpu_check == nr_cpu_ids) data->cpu_check = 0; snprintf(cmd, sizeof(cmd), "read %d\n", cpu); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 865af27c7737..714066aeda7f 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -52,6 +52,7 @@ #include <asm/meminit.h> #include <asm/page.h> #include <asm/paravirt.h> +#include <asm/paravirt_patch.h> #include <asm/patch.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -537,6 +538,7 @@ setup_arch (char **cmdline_p) paravirt_arch_setup_early(); ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); + paravirt_patch_apply(); *cmdline_p = __va(ia64_boot_param->command_line); strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); @@ -730,10 +732,10 @@ static void * c_start (struct seq_file *m, loff_t *pos) { #ifdef CONFIG_SMP - while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map)) + while (*pos < nr_cpu_ids && !cpu_online(*pos)) ++*pos; #endif - return *pos < NR_CPUS ? cpu_data(*pos) : NULL; + return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL; } static void * @@ -1016,8 +1018,7 @@ cpu_init (void) | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - if (current->mm) - BUG(); + BUG_ON(current->mm); ia64_mmu_init(ia64_imva(cpu_data)); ia64_mca_cpu_init(ia64_imva(cpu_data)); diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index da8f020d82c1..2ea4199d9c57 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -166,11 +166,11 @@ send_IPI_allbutself (int op) * Called with preemption disabled. */ static inline void -send_IPI_mask(cpumask_t mask, int op) +send_IPI_mask(const struct cpumask *mask, int op) { unsigned int cpu; - for_each_cpu_mask(cpu, mask) { + for_each_cpu(cpu, mask) { send_IPI_single(cpu, op); } } @@ -316,7 +316,7 @@ void arch_send_call_function_single_ipi(int cpu) send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { send_IPI_mask(mask, IPI_CALL_FUNC); } diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 52290547c85b..7700e23034bb 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -581,14 +581,14 @@ smp_build_cpu_map (void) ia64_cpu_to_sapicid[0] = boot_cpu_id; cpus_clear(cpu_present_map); - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_possible_map); + set_cpu_present(0, true); + set_cpu_possible(0, true); for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { sapicid = smp_boot_data.cpu_phys_id[i]; if (sapicid == boot_cpu_id) continue; - cpu_set(cpu, cpu_present_map); - cpu_set(cpu, cpu_possible_map); + set_cpu_present(cpu, true); + set_cpu_possible(cpu, true); ia64_cpu_to_sapicid[cpu] = sapicid; cpu++; } @@ -626,12 +626,9 @@ smp_prepare_cpus (unsigned int max_cpus) */ if (!max_cpus) { printk(KERN_INFO "SMP mode deactivated.\n"); - cpus_clear(cpu_online_map); - cpus_clear(cpu_present_map); - cpus_clear(cpu_possible_map); - cpu_set(0, cpu_online_map); - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_possible_map); + init_cpu_online(cpumask_of(0)); + init_cpu_present(cpumask_of(0)); + init_cpu_possible(cpumask_of(0)); return; } } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index f0ebb342409d..641c8b61c4f1 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -20,6 +20,7 @@ #include <linux/efi.h> #include <linux/timex.h> #include <linux/clocksource.h> +#include <linux/platform_device.h> #include <asm/machvec.h> #include <asm/delay.h> @@ -50,6 +51,15 @@ EXPORT_SYMBOL(last_cli_ip); #endif #ifdef CONFIG_PARAVIRT +/* We need to define a real function for sched_clock, to override the + weak default version */ +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#endif + +#ifdef CONFIG_PARAVIRT static void paravirt_clocksource_resume(void) { @@ -405,6 +415,21 @@ static struct irqaction timer_irqaction = { .name = "timer" }; +static struct platform_device rtc_efi_dev = { + .name = "rtc-efi", + .id = -1, +}; + +static int __init rtc_init(void) +{ + if (platform_device_register(&rtc_efi_dev) < 0) + printk(KERN_ERR "unable to register rtc device...\n"); + + /* not necessarily an error */ + return 0; +} +module_init(rtc_init); + void __init time_init (void) { diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 3765efc5f963..4a95e86b9ac2 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -169,6 +169,30 @@ SECTIONS __end___mckinley_e9_bundles = .; } +#if defined(CONFIG_PARAVIRT) + . = ALIGN(16); + .paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET) + { + __start_paravirt_bundles = .; + *(.paravirt_bundles) + __stop_paravirt_bundles = .; + } + . = ALIGN(16); + .paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET) + { + __start_paravirt_insts = .; + *(.paravirt_insts) + __stop_paravirt_insts = .; + } + . = ALIGN(16); + .paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET) + { + __start_paravirt_branches = .; + *(.paravirt_branches) + __stop_paravirt_branches = .; + } +#endif + #if defined(CONFIG_IA64_GENERIC) /* Machine Vector */ . = ALIGN(16); @@ -201,6 +225,12 @@ SECTIONS __start_gate_section = .; *(.data.gate) __stop_gate_section = .; +#ifdef CONFIG_XEN + . = ALIGN(PAGE_SIZE); + __xen_start_gate_section = .; + *(.data.gate.xen) + __xen_stop_gate_section = .; +#endif } . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose * kernel data diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 076b00d1dbff..28af6a731bb8 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -70,7 +70,7 @@ static void kvm_flush_icache(unsigned long start, unsigned long len) int l; for (l = 0; l < (len + 32); l += 32) - ia64_fc(start + l); + ia64_fc((void *)(start + l)); ia64_sync_i(); ia64_srlz_i(); diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index d4d280505878..a18ee17b9192 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c @@ -386,7 +386,7 @@ void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, else *rnat_addr = (*rnat_addr) & (~nat_mask); - ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore); + ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore); ia64_setreg(_IA64_REG_AR_RNAT, rnat); } local_irq_restore(psr); diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index 38232b37668b..2c2501f13159 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -210,6 +210,7 @@ void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) phy_pte &= ~PAGE_FLAGS_RV_MASK; psr = ia64_clear_ic(); ia64_itc(type, va, phy_pte, itir_ps(itir)); + paravirt_dv_serialize_data(); ia64_set_psr(psr); } @@ -456,6 +457,7 @@ void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, phy_pte &= ~PAGE_FLAGS_RV_MASK; psr = ia64_clear_ic(); ia64_itc(type, ifa, phy_pte, ps); + paravirt_dv_serialize_data(); ia64_set_psr(psr); } if (!(pte&VTLB_PTE_IO)) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 56e12903973c..c0f3bee69042 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -35,6 +35,7 @@ #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/mca.h> +#include <asm/paravirt.h> DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -259,6 +260,7 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) static void __init setup_gate (void) { + void *gate_section; struct page *page; /* @@ -266,10 +268,11 @@ setup_gate (void) * headers etc. and once execute-only page to enable * privilege-promotion via "epc": */ - page = virt_to_page(ia64_imva(__start_gate_section)); + gate_section = paravirt_get_gate_section(); + page = virt_to_page(ia64_imva(gate_section)); put_kernel_page(page, GATE_ADDR, PAGE_READONLY); #ifdef HAVE_BUGGY_SEGREL - page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE)); + page = virt_to_page(ia64_imva(gate_section + PAGE_SIZE)); put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE); #else put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE); @@ -633,8 +636,7 @@ mem_init (void) #endif #ifdef CONFIG_FLATMEM - if (!mem_map) - BUG(); + BUG_ON(!mem_map); max_mapnr = max_low_pfn; #endif @@ -667,8 +669,8 @@ mem_init (void) * code can tell them apart. */ for (i = 0; i < NR_syscalls; ++i) { - extern unsigned long fsyscall_table[NR_syscalls]; extern unsigned long sys_call_table[NR_syscalls]; + unsigned long *fsyscall_table = paravirt_get_fsyscall_table(); if (!fsyscall_table[i] || nolwsys) fsyscall_table[i] = sys_call_table[i] | 1; diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index bd9818a36b47..b9f3d7bbb338 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -309,7 +309,7 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, preempt_disable(); #ifdef CONFIG_SMP - if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) { + if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) { platform_global_tlb_purge(mm, start, end, nbits); preempt_enable(); return; diff --git a/arch/ia64/scripts/pvcheck.sed b/arch/ia64/scripts/pvcheck.sed index ba66ac2e4c60..e59809a3fc01 100644 --- a/arch/ia64/scripts/pvcheck.sed +++ b/arch/ia64/scripts/pvcheck.sed @@ -17,6 +17,7 @@ s/mov.*=.*cr\.iip/.warning \"cr.iip should not used directly\"/g s/mov.*=.*cr\.ivr/.warning \"cr.ivr should not used directly\"/g s/mov.*=[^\.]*psr/.warning \"psr should not used directly\"/g # avoid ar.fpsr s/mov.*=.*ar\.eflags/.warning \"ar.eflags should not used directly\"/g +s/mov.*=.*ar\.itc.*/.warning \"ar.itc should not used directly\"/g s/mov.*cr\.ifa.*=.*/.warning \"cr.ifa should not used directly\"/g s/mov.*cr\.itir.*=.*/.warning \"cr.itir should not used directly\"/g s/mov.*cr\.iha.*=.*/.warning \"cr.iha should not used directly\"/g diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index 0d4ffa4da1da..57f280dd9def 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -135,8 +135,7 @@ static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, } war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL); - if (!war_list) - BUG(); + BUG_ON(!war_list); SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, nasid, widget, __pa(war_list), 0, 0, 0 ,0); @@ -180,23 +179,20 @@ sn_common_hubdev_init(struct hubdev_info *hubdev) sizeof(struct sn_flush_device_kernel *); hubdev->hdi_flush_nasid_list.widget_p = kzalloc(size, GFP_KERNEL); - if (!hubdev->hdi_flush_nasid_list.widget_p) - BUG(); + BUG_ON(!hubdev->hdi_flush_nasid_list.widget_p); for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { size = DEV_PER_WIDGET * sizeof(struct sn_flush_device_kernel); sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); - if (!sn_flush_device_kernel) - BUG(); + BUG_ON(!sn_flush_device_kernel); dev_entry = sn_flush_device_kernel; for (device = 0; device < DEV_PER_WIDGET; device++, dev_entry++) { size = sizeof(struct sn_flush_device_common); dev_entry->common = kzalloc(size, GFP_KERNEL); - if (!dev_entry->common) - BUG(); + BUG_ON(!dev_entry->common); if (sn_prom_feature_available(PRF_DEVICE_FLUSH_LIST)) status = sal_get_device_dmaflush_list( hubdev->hdi_nasid, widget, device, @@ -326,8 +322,7 @@ sn_common_bus_fixup(struct pci_bus *bus, */ controller->platform_data = kzalloc(sizeof(struct sn_platform_data), GFP_KERNEL); - if (controller->platform_data == NULL) - BUG(); + BUG_ON(controller->platform_data == NULL); sn_platform_data = (struct sn_platform_data *) controller->platform_data; sn_platform_data->provider_soft = provider_soft; diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index e2eb2da60f96..ee774c366a06 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -128,8 +128,7 @@ sn_legacy_pci_window_fixup(struct pci_controller *controller, { controller->window = kcalloc(2, sizeof(struct pci_window), GFP_KERNEL); - if (controller->window == NULL) - BUG(); + BUG_ON(controller->window == NULL); controller->window[0].offset = legacy_io; controller->window[0].resource.name = "legacy_io"; controller->window[0].resource.flags = IORESOURCE_IO; @@ -168,8 +167,7 @@ sn_pci_window_fixup(struct pci_dev *dev, unsigned int count, idx = controller->windows; new_count = controller->windows + count; new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL); - if (new_window == NULL) - BUG(); + BUG_ON(new_window == NULL); if (controller->window) { memcpy(new_window, controller->window, sizeof(struct pci_window) * controller->windows); @@ -222,8 +220,7 @@ sn_io_slot_fixup(struct pci_dev *dev) (u64) __pa(pcidev_info), (u64) __pa(sn_irq_info)); - if (status) - BUG(); /* Cannot get platform pci device information */ + BUG_ON(status); /* Cannot get platform pci device information */ /* Copy over PIO Mapped Addresses */ @@ -307,8 +304,7 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) prom_bussoft_ptr = __va(prom_bussoft_ptr); controller = kzalloc(sizeof(*controller), GFP_KERNEL); - if (!controller) - BUG(); + BUG_ON(!controller); controller->segment = segment; /* diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 02c5b8a9fb60..e456f062f241 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -732,8 +732,7 @@ void __init build_cnode_tables(void) kl_config_hdr_t *klgraph_header; nasid = cnodeid_to_nasid(node); klgraph_header = ia64_sn_get_klconfig_addr(nasid); - if (klgraph_header == NULL) - BUG(); + BUG_ON(klgraph_header == NULL); brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info); while (brd) { if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) { @@ -750,7 +749,7 @@ nasid_slice_to_cpuid(int nasid, int slice) { long cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) + for (cpu = 0; cpu < nr_cpu_ids; cpu++) if (cpuid_to_nasid(cpu) == nasid && cpuid_to_slice(cpu) == slice) return cpu; diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index e585f9a2afb9..1176506b2bae 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -133,7 +133,7 @@ sn2_ipi_flush_all_tlb(struct mm_struct *mm) unsigned long itc; itc = ia64_get_itc(); - smp_flush_tlb_cpumask(mm->cpu_vm_mask); + smp_flush_tlb_cpumask(*mm_cpumask(mm)); itc = ia64_get_itc() - itc; __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; __get_cpu_var(ptcstats).shub_ipi_flushes++; @@ -182,7 +182,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, nodes_clear(nodes_flushed); i = 0; - for_each_cpu_mask(cpu, mm->cpu_vm_mask) { + for_each_cpu(cpu, mm_cpumask(mm)) { cnode = cpu_to_node(cpu); node_set(cnode, nodes_flushed); lcpu = cpu; @@ -461,7 +461,7 @@ bool sn_cpu_disable_allowed(int cpu) static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) { - if (*offset < NR_CPUS) + if (*offset < nr_cpu_ids) return offset; return NULL; } @@ -469,7 +469,7 @@ static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset) { (*offset)++; - if (*offset < NR_CPUS) + if (*offset < nr_cpu_ids) return offset; return NULL; } @@ -491,7 +491,7 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data) seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt); } - if (cpu < NR_CPUS && cpu_online(cpu)) { + if (cpu < nr_cpu_ids && cpu_online(cpu)) { stat = &per_cpu(ptcstats, cpu); seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, @@ -554,7 +554,7 @@ static int __init sn2_ptc_init(void) proc_sn2_ptc = proc_create(PTC_BASENAME, 0444, NULL, &proc_sn2_ptc_operations); - if (!&proc_sn2_ptc_operations) { + if (!proc_sn2_ptc) { printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); return -EINVAL; } diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index be339477f906..9e6491cf72bd 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -275,8 +275,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb /* get it's interconnect topology */ sz = op->ports * sizeof(struct sn_hwperf_port_info); - if (sz > sizeof(ptdata)) - BUG(); + BUG_ON(sz > sizeof(ptdata)); e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_PORTS, nodeobj->id, sz, (u64)&ptdata, 0, 0, NULL); @@ -310,8 +309,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb if (router && (!found_cpu || !found_mem)) { /* search for a node connected to the same router */ sz = router->ports * sizeof(struct sn_hwperf_port_info); - if (sz > sizeof(ptdata)) - BUG(); + BUG_ON(sz > sizeof(ptdata)); e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_PORTS, router->id, sz, (u64)&ptdata, 0, 0, NULL); @@ -612,7 +610,7 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK; if (cpu != SN_HWPERF_ARG_ANY_CPU) { - if (cpu >= NR_CPUS || !cpu_online(cpu)) { + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { r = -EINVAL; goto out; } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 060df4aa9916..c659ad5613a0 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -256,9 +256,7 @@ void sn_dma_flush(u64 addr) hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo; - if (!hubinfo) { - BUG(); - } + BUG_ON(!hubinfo); flush_nasid_list = &hubinfo->hdi_flush_nasid_list; if (flush_nasid_list->widget_p == NULL) diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index 0ad0224693d9..e6f4a0a74228 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -3,14 +3,29 @@ # obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \ - hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o + hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o \ + gate-data.o obj-$(CONFIG_IA64_GENERIC) += machvec.o +# The gate DSO image is built using a special linker script. +include $(srctree)/arch/ia64/kernel/Makefile.gate + +# tell compiled for xen +CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_XEN +AFLAGS_gate.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN -D__IA64_GATE_PARAVIRTUALIZED_XEN + +# use same file of native. +$(obj)/gate.o: $(src)/../kernel/gate.S FORCE + $(call if_changed_dep,as_o_S) +$(obj)/gate.lds: $(src)/../kernel/gate.lds.S FORCE + $(call if_changed_dep,cpp_lds_S) + + AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN # xen multi compile -ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S +ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S fsys.S ASM_PARAVIRT_OBJS = $(addprefix xen-,$(ASM_PARAVIRT_MULTI_COMPILE_SRCS:.S=.o)) obj-y += $(ASM_PARAVIRT_OBJS) define paravirtualized_xen diff --git a/arch/ia64/xen/gate-data.S b/arch/ia64/xen/gate-data.S new file mode 100644 index 000000000000..7d4830afc91d --- /dev/null +++ b/arch/ia64/xen/gate-data.S @@ -0,0 +1,3 @@ + .section .data.gate.xen, "aw" + + .incbin "arch/ia64/xen/gate.so" diff --git a/arch/ia64/xen/hypercall.S b/arch/ia64/xen/hypercall.S index 45e02bb64a92..e32dae444dd6 100644 --- a/arch/ia64/xen/hypercall.S +++ b/arch/ia64/xen/hypercall.S @@ -9,6 +9,7 @@ #include <asm/intrinsics.h> #include <asm/xen/privop.h> +#ifdef __INTEL_COMPILER /* * Hypercalls without parameter. */ @@ -72,6 +73,7 @@ GLOBAL_ENTRY(xen_set_rr0_to_rr4) br.ret.sptk.many rp ;; END(xen_set_rr0_to_rr4) +#endif GLOBAL_ENTRY(xen_send_ipi) mov r14=r32 diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index 68d6204c3f16..fb8332690179 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -175,10 +175,58 @@ static void xen_itc_jitter_data_reset(void) } while (unlikely(ret != lcycle)); } +/* based on xen_sched_clock() in arch/x86/xen/time.c. */ +/* + * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined, + * something similar logic should be implemented here. + */ +/* + * Xen sched_clock implementation. Returns the number of unstolen + * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED + * states. + */ +static unsigned long long xen_sched_clock(void) +{ + struct vcpu_runstate_info runstate; + + unsigned long long now; + unsigned long long offset; + unsigned long long ret; + + /* + * Ideally sched_clock should be called on a per-cpu basis + * anyway, so preempt should already be disabled, but that's + * not current practice at the moment. + */ + preempt_disable(); + + /* + * both ia64_native_sched_clock() and xen's runstate are + * based on mAR.ITC. So difference of them makes sense. + */ + now = ia64_native_sched_clock(); + + get_runstate_snapshot(&runstate); + + WARN_ON(runstate.state != RUNSTATE_running); + + offset = 0; + if (now > runstate.state_entry_time) + offset = now - runstate.state_entry_time; + ret = runstate.time[RUNSTATE_blocked] + + runstate.time[RUNSTATE_running] + + offset; + + preempt_enable(); + + return ret; +} + struct pv_time_ops xen_time_ops __initdata = { .init_missing_ticks_accounting = xen_init_missing_ticks_accounting, .do_steal_accounting = xen_do_steal_accounting, .clocksource_resume = xen_itc_jitter_data_reset, + .sched_clock = xen_sched_clock, }; /* Called after suspend, to resume time. */ diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c index 936cff3c96e0..5e2270a999fa 100644 --- a/arch/ia64/xen/xen_pv_ops.c +++ b/arch/ia64/xen/xen_pv_ops.c @@ -24,6 +24,7 @@ #include <linux/irq.h> #include <linux/kernel.h> #include <linux/pm.h> +#include <linux/unistd.h> #include <asm/xen/hypervisor.h> #include <asm/xen/xencomm.h> @@ -153,6 +154,13 @@ xen_post_smp_prepare_boot_cpu(void) xen_setup_vcpu_info_placement(); } +#ifdef ASM_SUPPORTED +static unsigned long __init_or_module +xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type); +#endif +static void __init +xen_patch_branch(unsigned long tag, unsigned long type); + static const struct pv_init_ops xen_init_ops __initconst = { .banner = xen_banner, @@ -163,6 +171,53 @@ static const struct pv_init_ops xen_init_ops __initconst = { .arch_setup_nomca = xen_arch_setup_nomca, .post_smp_prepare_boot_cpu = xen_post_smp_prepare_boot_cpu, +#ifdef ASM_SUPPORTED + .patch_bundle = xen_patch_bundle, +#endif + .patch_branch = xen_patch_branch, +}; + +/*************************************************************************** + * pv_fsys_data + * addresses for fsys + */ + +extern unsigned long xen_fsyscall_table[NR_syscalls]; +extern char xen_fsys_bubble_down[]; +struct pv_fsys_data xen_fsys_data __initdata = { + .fsyscall_table = (unsigned long *)xen_fsyscall_table, + .fsys_bubble_down = (void *)xen_fsys_bubble_down, +}; + +/*************************************************************************** + * pv_patchdata + * patchdata addresses + */ + +#define DECLARE(name) \ + extern unsigned long __xen_start_gate_##name##_patchlist[]; \ + extern unsigned long __xen_end_gate_##name##_patchlist[] + +DECLARE(fsyscall); +DECLARE(brl_fsys_bubble_down); +DECLARE(vtop); +DECLARE(mckinley_e9); + +extern unsigned long __xen_start_gate_section[]; + +#define ASSIGN(name) \ + .start_##name##_patchlist = \ + (unsigned long)__xen_start_gate_##name##_patchlist, \ + .end_##name##_patchlist = \ + (unsigned long)__xen_end_gate_##name##_patchlist + +static struct pv_patchdata xen_patchdata __initdata = { + ASSIGN(fsyscall), + ASSIGN(brl_fsys_bubble_down), + ASSIGN(vtop), + ASSIGN(mckinley_e9), + + .gate_section = (void*)__xen_start_gate_section, }; /*************************************************************************** @@ -170,6 +225,76 @@ static const struct pv_init_ops xen_init_ops __initconst = { * intrinsics hooks. */ +#ifndef ASM_SUPPORTED +static void +xen_set_itm_with_offset(unsigned long val) +{ + /* ia64_cpu_local_tick() calls this with interrupt enabled. */ + /* WARN_ON(!irqs_disabled()); */ + xen_set_itm(val - XEN_MAPPEDREGS->itc_offset); +} + +static unsigned long +xen_get_itm_with_offset(void) +{ + /* unused at this moment */ + printk(KERN_DEBUG "%s is called.\n", __func__); + + WARN_ON(!irqs_disabled()); + return ia64_native_getreg(_IA64_REG_CR_ITM) + + XEN_MAPPEDREGS->itc_offset; +} + +/* ia64_set_itc() is only called by + * cpu_init() with ia64_set_itc(0) and ia64_sync_itc(). + * So XEN_MAPPEDRESG->itc_offset cal be considered as almost constant. + */ +static void +xen_set_itc(unsigned long val) +{ + unsigned long mitc; + + WARN_ON(!irqs_disabled()); + mitc = ia64_native_getreg(_IA64_REG_AR_ITC); + XEN_MAPPEDREGS->itc_offset = val - mitc; + XEN_MAPPEDREGS->itc_last = val; +} + +static unsigned long +xen_get_itc(void) +{ + unsigned long res; + unsigned long itc_offset; + unsigned long itc_last; + unsigned long ret_itc_last; + + itc_offset = XEN_MAPPEDREGS->itc_offset; + do { + itc_last = XEN_MAPPEDREGS->itc_last; + res = ia64_native_getreg(_IA64_REG_AR_ITC); + res += itc_offset; + if (itc_last >= res) + res = itc_last + 1; + ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last, + itc_last, res); + } while (unlikely(ret_itc_last != itc_last)); + return res; + +#if 0 + /* ia64_itc_udelay() calls ia64_get_itc() with interrupt enabled. + Should it be paravirtualized instead? */ + WARN_ON(!irqs_disabled()); + itc_offset = XEN_MAPPEDREGS->itc_offset; + itc_last = XEN_MAPPEDREGS->itc_last; + res = ia64_native_getreg(_IA64_REG_AR_ITC); + res += itc_offset; + if (itc_last >= res) + res = itc_last + 1; + XEN_MAPPEDREGS->itc_last = res; + return res; +#endif +} + static void xen_setreg(int regnum, unsigned long val) { switch (regnum) { @@ -181,11 +306,14 @@ static void xen_setreg(int regnum, unsigned long val) xen_set_eflag(val); break; #endif + case _IA64_REG_AR_ITC: + xen_set_itc(val); + break; case _IA64_REG_CR_TPR: xen_set_tpr(val); break; case _IA64_REG_CR_ITM: - xen_set_itm(val); + xen_set_itm_with_offset(val); break; case _IA64_REG_CR_EOI: xen_eoi(val); @@ -209,6 +337,12 @@ static unsigned long xen_getreg(int regnum) res = xen_get_eflag(); break; #endif + case _IA64_REG_AR_ITC: + res = xen_get_itc(); + break; + case _IA64_REG_CR_ITM: + res = xen_get_itm_with_offset(); + break; case _IA64_REG_CR_IVR: res = xen_get_ivr(); break; @@ -259,8 +393,417 @@ xen_intrin_local_irq_restore(unsigned long mask) else xen_rsm_i(); } +#else +#define __DEFINE_FUNC(name, code) \ + extern const char xen_ ## name ## _direct_start[]; \ + extern const char xen_ ## name ## _direct_end[]; \ + asm (".align 32\n" \ + ".proc xen_" #name "\n" \ + "xen_" #name ":\n" \ + "xen_" #name "_direct_start:\n" \ + code \ + "xen_" #name "_direct_end:\n" \ + "br.cond.sptk.many b6\n" \ + ".endp xen_" #name "\n") + +#define DEFINE_VOID_FUNC0(name, code) \ + extern void \ + xen_ ## name (void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1(name, code) \ + extern void \ + xen_ ## name (unsigned long arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1_VOID(name, code) \ + extern void \ + xen_ ## name (void *arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC2(name, code) \ + extern void \ + xen_ ## name (unsigned long arg0, \ + unsigned long arg1); \ + __DEFINE_FUNC(name, code) -static const struct pv_cpu_ops xen_cpu_ops __initdata = { +#define DEFINE_FUNC0(name, code) \ + extern unsigned long \ + xen_ ## name (void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC1(name, type, code) \ + extern unsigned long \ + xen_ ## name (type arg); \ + __DEFINE_FUNC(name, code) + +#define XEN_PSR_I_ADDR_ADDR (XSI_BASE + XSI_PSR_I_ADDR_OFS) + +/* + * static void xen_set_itm_with_offset(unsigned long val) + * xen_set_itm(val - XEN_MAPPEDREGS->itc_offset); + */ +/* 2 bundles */ +DEFINE_VOID_FUNC1(set_itm_with_offset, + "mov r2 = " __stringify(XSI_BASE) " + " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + ";;\n" + "ld8 r3 = [r2]\n" + ";;\n" + "sub r8 = r8, r3\n" + "break " __stringify(HYPERPRIVOP_SET_ITM) "\n"); + +/* + * static unsigned long xen_get_itm_with_offset(void) + * return ia64_native_getreg(_IA64_REG_CR_ITM) + XEN_MAPPEDREGS->itc_offset; + */ +/* 2 bundles */ +DEFINE_FUNC0(get_itm_with_offset, + "mov r2 = " __stringify(XSI_BASE) " + " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + ";;\n" + "ld8 r3 = [r2]\n" + "mov r8 = cr.itm\n" + ";;\n" + "add r8 = r8, r2\n"); + +/* + * static void xen_set_itc(unsigned long val) + * unsigned long mitc; + * + * WARN_ON(!irqs_disabled()); + * mitc = ia64_native_getreg(_IA64_REG_AR_ITC); + * XEN_MAPPEDREGS->itc_offset = val - mitc; + * XEN_MAPPEDREGS->itc_last = val; + */ +/* 2 bundles */ +DEFINE_VOID_FUNC1(set_itc, + "mov r2 = " __stringify(XSI_BASE) " + " + __stringify(XSI_ITC_LAST_OFS) "\n" + "mov r3 = ar.itc\n" + ";;\n" + "sub r3 = r8, r3\n" + "st8 [r2] = r8, " + __stringify(XSI_ITC_LAST_OFS) " - " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + ";;\n" + "st8 [r2] = r3\n"); + +/* + * static unsigned long xen_get_itc(void) + * unsigned long res; + * unsigned long itc_offset; + * unsigned long itc_last; + * unsigned long ret_itc_last; + * + * itc_offset = XEN_MAPPEDREGS->itc_offset; + * do { + * itc_last = XEN_MAPPEDREGS->itc_last; + * res = ia64_native_getreg(_IA64_REG_AR_ITC); + * res += itc_offset; + * if (itc_last >= res) + * res = itc_last + 1; + * ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last, + * itc_last, res); + * } while (unlikely(ret_itc_last != itc_last)); + * return res; + */ +/* 5 bundles */ +DEFINE_FUNC0(get_itc, + "mov r2 = " __stringify(XSI_BASE) " + " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + ";;\n" + "ld8 r9 = [r2], " __stringify(XSI_ITC_LAST_OFS) " - " + __stringify(XSI_ITC_OFFSET_OFS) "\n" + /* r9 = itc_offset */ + /* r2 = XSI_ITC_OFFSET */ + "888:\n" + "mov r8 = ar.itc\n" /* res = ar.itc */ + ";;\n" + "ld8 r3 = [r2]\n" /* r3 = itc_last */ + "add r8 = r8, r9\n" /* res = ar.itc + itc_offset */ + ";;\n" + "cmp.gtu p6, p0 = r3, r8\n" + ";;\n" + "(p6) add r8 = 1, r3\n" /* if (itc_last > res) itc_last + 1 */ + ";;\n" + "mov ar.ccv = r8\n" + ";;\n" + "cmpxchg8.acq r10 = [r2], r8, ar.ccv\n" + ";;\n" + "cmp.ne p6, p0 = r10, r3\n" + "(p6) hint @pause\n" + "(p6) br.cond.spnt 888b\n"); + +DEFINE_VOID_FUNC1_VOID(fc, + "break " __stringify(HYPERPRIVOP_FC) "\n"); + +/* + * psr_i_addr_addr = XEN_PSR_I_ADDR_ADDR + * masked_addr = *psr_i_addr_addr + * pending_intr_addr = masked_addr - 1 + * if (val & IA64_PSR_I) { + * masked = *masked_addr + * *masked_addr = 0:xen_set_virtual_psr_i(1) + * compiler barrier + * if (masked) { + * uint8_t pending = *pending_intr_addr; + * if (pending) + * XEN_HYPER_SSM_I + * } + * } else { + * *masked_addr = 1:xen_set_virtual_psr_i(0) + * } + */ +/* 6 bundles */ +DEFINE_VOID_FUNC1(intrin_local_irq_restore, + /* r8 = input value: 0 or IA64_PSR_I + * p6 = (flags & IA64_PSR_I) + * = if clause + * p7 = !(flags & IA64_PSR_I) + * = else clause + */ + "cmp.ne p6, p7 = r8, r0\n" + "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n" + ";;\n" + /* r9 = XEN_PSR_I_ADDR */ + "ld8 r9 = [r9]\n" + ";;\n" + + /* r10 = masked previous value */ + "(p6) ld1.acq r10 = [r9]\n" + ";;\n" + + /* p8 = !masked interrupt masked previously? */ + "(p6) cmp.ne.unc p8, p0 = r10, r0\n" + + /* p7 = else clause */ + "(p7) mov r11 = 1\n" + ";;\n" + /* masked = 1 */ + "(p7) st1.rel [r9] = r11\n" + + /* p6 = if clause */ + /* masked = 0 + * r9 = masked_addr - 1 + * = pending_intr_addr + */ + "(p8) st1.rel [r9] = r0, -1\n" + ";;\n" + /* r8 = pending_intr */ + "(p8) ld1.acq r11 = [r9]\n" + ";;\n" + /* p9 = interrupt pending? */ + "(p8) cmp.ne.unc p9, p10 = r11, r0\n" + ";;\n" + "(p10) mf\n" + /* issue hypercall to trigger interrupt */ + "(p9) break " __stringify(HYPERPRIVOP_SSM_I) "\n"); + +DEFINE_VOID_FUNC2(ptcga, + "break " __stringify(HYPERPRIVOP_PTC_GA) "\n"); +DEFINE_VOID_FUNC2(set_rr, + "break " __stringify(HYPERPRIVOP_SET_RR) "\n"); + +/* + * tmp = XEN_MAPPEDREGS->interrupt_mask_addr = XEN_PSR_I_ADDR_ADDR; + * tmp = *tmp + * tmp = *tmp; + * psr_i = tmp? 0: IA64_PSR_I; + */ +/* 4 bundles */ +DEFINE_FUNC0(get_psr_i, + "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n" + ";;\n" + "ld8 r9 = [r9]\n" /* r9 = XEN_PSR_I_ADDR */ + "mov r8 = 0\n" /* psr_i = 0 */ + ";;\n" + "ld1.acq r9 = [r9]\n" /* r9 = XEN_PSR_I */ + ";;\n" + "cmp.eq.unc p6, p0 = r9, r0\n" /* p6 = (XEN_PSR_I != 0) */ + ";;\n" + "(p6) mov r8 = " __stringify(1 << IA64_PSR_I_BIT) "\n"); + +DEFINE_FUNC1(thash, unsigned long, + "break " __stringify(HYPERPRIVOP_THASH) "\n"); +DEFINE_FUNC1(get_cpuid, int, + "break " __stringify(HYPERPRIVOP_GET_CPUID) "\n"); +DEFINE_FUNC1(get_pmd, int, + "break " __stringify(HYPERPRIVOP_GET_PMD) "\n"); +DEFINE_FUNC1(get_rr, unsigned long, + "break " __stringify(HYPERPRIVOP_GET_RR) "\n"); + +/* + * void xen_privop_ssm_i(void) + * + * int masked = !xen_get_virtual_psr_i(); + * // masked = *(*XEN_MAPPEDREGS->interrupt_mask_addr) + * xen_set_virtual_psr_i(1) + * // *(*XEN_MAPPEDREGS->interrupt_mask_addr) = 0 + * // compiler barrier + * if (masked) { + * uint8_t* pend_int_addr = + * (uint8_t*)(*XEN_MAPPEDREGS->interrupt_mask_addr) - 1; + * uint8_t pending = *pend_int_addr; + * if (pending) + * XEN_HYPER_SSM_I + * } + */ +/* 4 bundles */ +DEFINE_VOID_FUNC0(ssm_i, + "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n" + ";;\n" + "ld8 r8 = [r8]\n" /* r8 = XEN_PSR_I_ADDR */ + ";;\n" + "ld1.acq r9 = [r8]\n" /* r9 = XEN_PSR_I */ + ";;\n" + "st1.rel [r8] = r0, -1\n" /* psr_i = 0. enable interrupt + * r8 = XEN_PSR_I_ADDR - 1 + * = pend_int_addr + */ + "cmp.eq.unc p0, p6 = r9, r0\n"/* p6 = !XEN_PSR_I + * previously interrupt + * masked? + */ + ";;\n" + "(p6) ld1.acq r8 = [r8]\n" /* r8 = xen_pend_int */ + ";;\n" + "(p6) cmp.eq.unc p6, p7 = r8, r0\n" /*interrupt pending?*/ + ";;\n" + /* issue hypercall to get interrupt */ + "(p7) break " __stringify(HYPERPRIVOP_SSM_I) "\n" + ";;\n"); + +/* + * psr_i_addr_addr = XEN_MAPPEDREGS->interrupt_mask_addr + * = XEN_PSR_I_ADDR_ADDR; + * psr_i_addr = *psr_i_addr_addr; + * *psr_i_addr = 1; + */ +/* 2 bundles */ +DEFINE_VOID_FUNC0(rsm_i, + "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n" + /* r8 = XEN_PSR_I_ADDR */ + "mov r9 = 1\n" + ";;\n" + "ld8 r8 = [r8]\n" /* r8 = XEN_PSR_I */ + ";;\n" + "st1.rel [r8] = r9\n"); /* XEN_PSR_I = 1 */ + +extern void +xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4); +__DEFINE_FUNC(set_rr0_to_rr4, + "break " __stringify(HYPERPRIVOP_SET_RR0_TO_RR4) "\n"); + + +extern unsigned long xen_getreg(int regnum); +#define __DEFINE_GET_REG(id, privop) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r8\n" \ + ";;\n" \ + "(p6) break " __stringify(HYPERPRIVOP_GET_ ## privop) "\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" + +__DEFINE_FUNC(getreg, + __DEFINE_GET_REG(PSR, PSR) +#ifdef CONFIG_IA32_SUPPORT + __DEFINE_GET_REG(AR_EFLAG, EFLAG) +#endif + + /* get_itc */ + "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r8\n" + ";;\n" + "(p6) br.cond.spnt xen_get_itc\n" + ";;\n" + + /* get itm */ + "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r8\n" + ";;\n" + "(p6) br.cond.spnt xen_get_itm_with_offset\n" + ";;\n" + + __DEFINE_GET_REG(CR_IVR, IVR) + __DEFINE_GET_REG(CR_TPR, TPR) + + /* fall back */ + "movl r2 = ia64_native_getreg_func\n" + ";;\n" + "mov b7 = r2\n" + ";;\n" + "br.cond.sptk.many b7\n"); + +extern void xen_setreg(int regnum, unsigned long val); +#define __DEFINE_SET_REG(id, privop) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r9\n" \ + ";;\n" \ + "(p6) break " __stringify(HYPERPRIVOP_ ## privop) "\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" + +__DEFINE_FUNC(setreg, + /* kr0 .. kr 7*/ + /* + * if (_IA64_REG_AR_KR0 <= regnum && + * regnum <= _IA64_REG_AR_KR7) { + * register __index asm ("r8") = regnum - _IA64_REG_AR_KR0 + * register __val asm ("r9") = val + * "break HYPERPRIVOP_SET_KR" + * } + */ + "mov r17 = r9\n" + "mov r2 = " __stringify(_IA64_REG_AR_KR0) "\n" + ";;\n" + "cmp.ge p6, p0 = r9, r2\n" + "sub r17 = r17, r2\n" + ";;\n" + "(p6) cmp.ge.unc p7, p0 = " + __stringify(_IA64_REG_AR_KR7) " - " __stringify(_IA64_REG_AR_KR0) + ", r17\n" + ";;\n" + "(p7) mov r9 = r8\n" + ";;\n" + "(p7) mov r8 = r17\n" + "(p7) break " __stringify(HYPERPRIVOP_SET_KR) "\n" + + /* set itm */ + "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r8\n" + ";;\n" + "(p6) br.cond.spnt xen_set_itm_with_offset\n" + + /* set itc */ + "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r8\n" + ";;\n" + "(p6) br.cond.spnt xen_set_itc\n" + +#ifdef CONFIG_IA32_SUPPORT + __DEFINE_SET_REG(AR_EFLAG, SET_EFLAG) +#endif + __DEFINE_SET_REG(CR_TPR, SET_TPR) + __DEFINE_SET_REG(CR_EOI, EOI) + + /* fall back */ + "movl r2 = ia64_native_setreg_func\n" + ";;\n" + "mov b7 = r2\n" + ";;\n" + "br.cond.sptk.many b7\n"); +#endif + +static const struct pv_cpu_ops xen_cpu_ops __initconst = { .fc = xen_fc, .thash = xen_thash, .get_cpuid = xen_get_cpuid, @@ -337,7 +880,7 @@ xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op); } -static const struct pv_iosapic_ops xen_iosapic_ops __initconst = { +static struct pv_iosapic_ops xen_iosapic_ops __initdata = { .pcat_compat_init = xen_pcat_compat_init, .__get_irq_chip = xen_iosapic_get_irq_chip, @@ -355,6 +898,8 @@ xen_setup_pv_ops(void) xen_info_init(); pv_info = xen_info; pv_init_ops = xen_init_ops; + pv_fsys_data = xen_fsys_data; + pv_patchdata = xen_patchdata; pv_cpu_ops = xen_cpu_ops; pv_iosapic_ops = xen_iosapic_ops; pv_irq_ops = xen_irq_ops; @@ -362,3 +907,252 @@ xen_setup_pv_ops(void) paravirt_cpu_asm_init(&xen_cpu_asm_switch); } + +#ifdef ASM_SUPPORTED +/*************************************************************************** + * binary pacthing + * pv_init_ops.patch_bundle + */ + +#define DEFINE_FUNC_GETREG(name, privop) \ + DEFINE_FUNC0(get_ ## name, \ + "break "__stringify(HYPERPRIVOP_GET_ ## privop) "\n") + +DEFINE_FUNC_GETREG(psr, PSR); +DEFINE_FUNC_GETREG(eflag, EFLAG); +DEFINE_FUNC_GETREG(ivr, IVR); +DEFINE_FUNC_GETREG(tpr, TPR); + +#define DEFINE_FUNC_SET_KR(n) \ + DEFINE_VOID_FUNC0(set_kr ## n, \ + ";;\n" \ + "mov r9 = r8\n" \ + "mov r8 = " #n "\n" \ + "break " __stringify(HYPERPRIVOP_SET_KR) "\n") + +DEFINE_FUNC_SET_KR(0); +DEFINE_FUNC_SET_KR(1); +DEFINE_FUNC_SET_KR(2); +DEFINE_FUNC_SET_KR(3); +DEFINE_FUNC_SET_KR(4); +DEFINE_FUNC_SET_KR(5); +DEFINE_FUNC_SET_KR(6); +DEFINE_FUNC_SET_KR(7); + +#define __DEFINE_FUNC_SETREG(name, privop) \ + DEFINE_VOID_FUNC0(name, \ + "break "__stringify(HYPERPRIVOP_ ## privop) "\n") + +#define DEFINE_FUNC_SETREG(name, privop) \ + __DEFINE_FUNC_SETREG(set_ ## name, SET_ ## privop) + +DEFINE_FUNC_SETREG(eflag, EFLAG); +DEFINE_FUNC_SETREG(tpr, TPR); +__DEFINE_FUNC_SETREG(eoi, EOI); + +extern const char xen_check_events[]; +extern const char __xen_intrin_local_irq_restore_direct_start[]; +extern const char __xen_intrin_local_irq_restore_direct_end[]; +extern const unsigned long __xen_intrin_local_irq_restore_direct_reloc; + +asm ( + ".align 32\n" + ".proc xen_check_events\n" + "xen_check_events:\n" + /* masked = 0 + * r9 = masked_addr - 1 + * = pending_intr_addr + */ + "st1.rel [r9] = r0, -1\n" + ";;\n" + /* r8 = pending_intr */ + "ld1.acq r11 = [r9]\n" + ";;\n" + /* p9 = interrupt pending? */ + "cmp.ne p9, p10 = r11, r0\n" + ";;\n" + "(p10) mf\n" + /* issue hypercall to trigger interrupt */ + "(p9) break " __stringify(HYPERPRIVOP_SSM_I) "\n" + "br.cond.sptk.many b6\n" + ".endp xen_check_events\n" + "\n" + ".align 32\n" + ".proc __xen_intrin_local_irq_restore_direct\n" + "__xen_intrin_local_irq_restore_direct:\n" + "__xen_intrin_local_irq_restore_direct_start:\n" + "1:\n" + "{\n" + "cmp.ne p6, p7 = r8, r0\n" + "mov r17 = ip\n" /* get ip to calc return address */ + "mov r9 = "__stringify(XEN_PSR_I_ADDR_ADDR) "\n" + ";;\n" + "}\n" + "{\n" + /* r9 = XEN_PSR_I_ADDR */ + "ld8 r9 = [r9]\n" + ";;\n" + /* r10 = masked previous value */ + "(p6) ld1.acq r10 = [r9]\n" + "adds r17 = 1f - 1b, r17\n" /* calculate return address */ + ";;\n" + "}\n" + "{\n" + /* p8 = !masked interrupt masked previously? */ + "(p6) cmp.ne.unc p8, p0 = r10, r0\n" + "\n" + /* p7 = else clause */ + "(p7) mov r11 = 1\n" + ";;\n" + "(p8) mov b6 = r17\n" /* set return address */ + "}\n" + "{\n" + /* masked = 1 */ + "(p7) st1.rel [r9] = r11\n" + "\n" + "[99:]\n" + "(p8) brl.cond.dptk.few xen_check_events\n" + "}\n" + /* pv calling stub is 5 bundles. fill nop to adjust return address */ + "{\n" + "nop 0\n" + "nop 0\n" + "nop 0\n" + "}\n" + "1:\n" + "__xen_intrin_local_irq_restore_direct_end:\n" + ".endp __xen_intrin_local_irq_restore_direct\n" + "\n" + ".align 8\n" + "__xen_intrin_local_irq_restore_direct_reloc:\n" + "data8 99b\n" +); + +static struct paravirt_patch_bundle_elem xen_patch_bundle_elems[] +__initdata_or_module = +{ +#define XEN_PATCH_BUNDLE_ELEM(name, type) \ + { \ + (void*)xen_ ## name ## _direct_start, \ + (void*)xen_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_ ## type, \ + } + + XEN_PATCH_BUNDLE_ELEM(fc, FC), + XEN_PATCH_BUNDLE_ELEM(thash, THASH), + XEN_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID), + XEN_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD), + XEN_PATCH_BUNDLE_ELEM(ptcga, PTCGA), + XEN_PATCH_BUNDLE_ELEM(get_rr, GET_RR), + XEN_PATCH_BUNDLE_ELEM(set_rr, SET_RR), + XEN_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4), + XEN_PATCH_BUNDLE_ELEM(ssm_i, SSM_I), + XEN_PATCH_BUNDLE_ELEM(rsm_i, RSM_I), + XEN_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I), + { + (void*)__xen_intrin_local_irq_restore_direct_start, + (void*)__xen_intrin_local_irq_restore_direct_end, + PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE, + }, + +#define XEN_PATCH_BUNDLE_ELEM_GETREG(name, reg) \ + { \ + xen_get_ ## name ## _direct_start, \ + xen_get_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \ + } + + XEN_PATCH_BUNDLE_ELEM_GETREG(psr, PSR), + XEN_PATCH_BUNDLE_ELEM_GETREG(eflag, AR_EFLAG), + + XEN_PATCH_BUNDLE_ELEM_GETREG(ivr, CR_IVR), + XEN_PATCH_BUNDLE_ELEM_GETREG(tpr, CR_TPR), + + XEN_PATCH_BUNDLE_ELEM_GETREG(itc, AR_ITC), + XEN_PATCH_BUNDLE_ELEM_GETREG(itm_with_offset, CR_ITM), + + +#define __XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + { \ + xen_ ## name ## _direct_start, \ + xen_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \ + } + +#define XEN_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + __XEN_PATCH_BUNDLE_ELEM_SETREG(set_ ## name, reg) + + XEN_PATCH_BUNDLE_ELEM_SETREG(kr0, AR_KR0), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr1, AR_KR1), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr2, AR_KR2), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr3, AR_KR3), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr4, AR_KR4), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr5, AR_KR5), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr6, AR_KR6), + XEN_PATCH_BUNDLE_ELEM_SETREG(kr7, AR_KR7), + + XEN_PATCH_BUNDLE_ELEM_SETREG(eflag, AR_EFLAG), + XEN_PATCH_BUNDLE_ELEM_SETREG(tpr, CR_TPR), + __XEN_PATCH_BUNDLE_ELEM_SETREG(eoi, CR_EOI), + + XEN_PATCH_BUNDLE_ELEM_SETREG(itc, AR_ITC), + XEN_PATCH_BUNDLE_ELEM_SETREG(itm_with_offset, CR_ITM), +}; + +static unsigned long __init_or_module +xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type) +{ + const unsigned long nelems = sizeof(xen_patch_bundle_elems) / + sizeof(xen_patch_bundle_elems[0]); + unsigned long used; + const struct paravirt_patch_bundle_elem *found; + + used = __paravirt_patch_apply_bundle(sbundle, ebundle, type, + xen_patch_bundle_elems, nelems, + &found); + + if (found == NULL) + /* fallback */ + return ia64_native_patch_bundle(sbundle, ebundle, type); + if (used == 0) + return used; + + /* relocation */ + switch (type) { + case PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE: { + unsigned long reloc = + __xen_intrin_local_irq_restore_direct_reloc; + unsigned long reloc_offset = reloc - (unsigned long) + __xen_intrin_local_irq_restore_direct_start; + unsigned long tag = (unsigned long)sbundle + reloc_offset; + paravirt_patch_reloc_brl(tag, xen_check_events); + break; + } + default: + /* nothing */ + break; + } + return used; +} +#endif /* ASM_SUPPOTED */ + +const struct paravirt_patch_branch_target xen_branch_target[] +__initconst = { +#define PARAVIRT_BR_TARGET(name, type) \ + { \ + &xen_ ## name, \ + PARAVIRT_PATCH_TYPE_BR_ ## type, \ + } + PARAVIRT_BR_TARGET(switch_to, SWITCH_TO), + PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL), + PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL), + PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL), +}; + +static void __init +xen_patch_branch(unsigned long tag, unsigned long type) +{ + const unsigned long nelem = + sizeof(xen_branch_target) / sizeof(xen_branch_target[0]); + __paravirt_patch_apply_branch(tag, type, xen_branch_target, nelem); +} diff --git a/arch/mips/include/asm/mach-bcm47xx/gpio.h b/arch/mips/include/asm/mach-bcm47xx/gpio.h index d8ff4cd89ab5..1784fde2e28f 100644 --- a/arch/mips/include/asm/mach-bcm47xx/gpio.h +++ b/arch/mips/include/asm/mach-bcm47xx/gpio.h @@ -31,24 +31,28 @@ static inline void gpio_set_value(unsigned gpio, int value) static inline int gpio_direction_input(unsigned gpio) { - return ssb_gpio_outen(&ssb_bcm47xx, 1 << gpio, 0); + ssb_gpio_outen(&ssb_bcm47xx, 1 << gpio, 0); + return 0; } static inline int gpio_direction_output(unsigned gpio, int value) { - return ssb_gpio_outen(&ssb_bcm47xx, 1 << gpio, 1 << gpio); + ssb_gpio_outen(&ssb_bcm47xx, 1 << gpio, 1 << gpio); + return 0; } -static int gpio_intmask(unsigned gpio, int value) +static inline int gpio_intmask(unsigned gpio, int value) { - return ssb_gpio_intmask(&ssb_bcm47xx, 1 << gpio, - value ? 1 << gpio : 0); + ssb_gpio_intmask(&ssb_bcm47xx, 1 << gpio, + value ? 1 << gpio : 0); + return 0; } -static int gpio_polarity(unsigned gpio, int value) +static inline int gpio_polarity(unsigned gpio, int value) { - return ssb_gpio_polarity(&ssb_bcm47xx, 1 << gpio, - value ? 1 << gpio : 0); + ssb_gpio_polarity(&ssb_bcm47xx, 1 << gpio, + value ? 1 << gpio : 0); + return 0; } diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index 060d28dca8a8..4481656d1065 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -42,6 +42,7 @@ void *__kmap_atomic(struct page *page, enum km_type type) if (!PageHighMem(page)) return page_address(page); + debug_kmap_atomic(type); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); #ifdef CONFIG_DEBUG_HIGHMEM @@ -88,6 +89,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) pagefault_disable(); + debug_kmap_atomic(type); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 9d46c43a4152..e75cae6072c5 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -216,17 +216,14 @@ void __init start_cpu_itimer(void) per_cpu(cpu_data, cpu).it_value = next_tick; } -struct platform_device rtc_parisc_dev = { +static struct platform_device rtc_parisc_dev = { .name = "rtc-parisc", .id = -1, }; static int __init rtc_init(void) { - int ret; - - ret = platform_device_register(&rtc_parisc_dev); - if (ret < 0) + if (platform_device_register(&rtc_parisc_dev) < 0) printk(KERN_ERR "unable to register rtc device...\n"); /* not necessarily an error */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ad6b1c084fe3..45192dce65c4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -228,6 +228,9 @@ config PPC_OF_PLATFORM_PCI depends on PPC64 # not supported on 32 bits yet default n +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 22091bbfdc9b..6aa0b5e087cd 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -30,6 +30,7 @@ config DEBUG_STACK_USAGE config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL && !HIBERNATION + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC help Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts index dea30910c136..4319bd70a580 100644 --- a/arch/powerpc/boot/dts/mpc832x_rdb.dts +++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts @@ -152,10 +152,21 @@ }; par_io@1400 { + #address-cells = <1>; + #size-cells = <1>; reg = <0x1400 0x100>; + ranges = <3 0x1448 0x18>; + compatible = "fsl,mpc8323-qe-pario"; device_type = "par_io"; num-ports = <7>; + qe_pio_d: gpio-controller@1448 { + #gpio-cells = <2>; + compatible = "fsl,mpc8323-qe-pario-bank"; + reg = <3 0x18>; + gpio-controller; + }; + ucc2pio:ucc_pin@02 { pio-map = < /* port pin dir open_drain assignment has_irq */ @@ -225,12 +236,25 @@ }; spi@4c0 { + #address-cells = <1>; + #size-cells = <0>; cell-index = <0>; compatible = "fsl,spi"; reg = <0x4c0 0x40>; interrupts = <2>; interrupt-parent = <&qeic>; + gpios = <&qe_pio_d 13 0>; mode = "cpu-qe"; + + mmc-slot@0 { + compatible = "fsl,mpc8323rdb-mmc-slot", + "mmc-spi-slot"; + reg = <0>; + gpios = <&qe_pio_d 14 1 + &qe_pio_d 15 0>; + voltage-ranges = <3300 3300>; + spi-max-frequency = <50000000>; + }; }; spi@500 { diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 545028f86488..684a73f4324f 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -24,6 +24,7 @@ #include <linux/init.h> #include <linux/interrupt.h> +#include <linux/highmem.h> #include <asm/kmap_types.h> #include <asm/tlbflush.h> #include <asm/page.h> @@ -94,6 +95,7 @@ static inline void *kmap_atomic_prot(struct page *page, enum km_type type, pgpro if (!PageHighMem(page)) return page_address(page); + debug_kmap_atomic(type); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); #ifdef CONFIG_DEBUG_HIGHMEM diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 3548159a1beb..ba17d5d90a49 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -114,6 +114,10 @@ extern int pci_domain_nr(struct pci_bus *bus); /* Decide whether to display the domain number in /proc */ extern int pci_proc_domain(struct pci_bus *bus); +/* MSI arch hooks */ +#define arch_setup_msi_irqs arch_setup_msi_irqs +#define arch_teardown_msi_irqs arch_teardown_msi_irqs +#define arch_msi_check_device arch_msi_check_device struct vm_area_struct; /* Map a range of PCI memory or I/O space for a device into user space */ diff --git a/arch/powerpc/include/asm/suspend.h b/arch/powerpc/include/asm/suspend.h index cbf2c9404c37..c6efc3466aa6 100644 --- a/arch/powerpc/include/asm/suspend.h +++ b/arch/powerpc/include/asm/suspend.h @@ -3,7 +3,4 @@ static inline int arch_prepare_suspend(void) { return 0; } -void save_processor_state(void); -void restore_processor_state(void); - #endif /* __ASM_POWERPC_SUSPEND_H */ diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 3bb7d3dd28be..8bbc12d20f5c 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/msi.h> +#include <linux/pci.h> #include <asm/machdep.h> @@ -19,6 +20,10 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int type) return -ENOSYS; } + /* PowerPC doesn't support multiple MSI yet */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + if (ppc_md.msi_check_device) { pr_debug("msi: Using platform check routine.\n"); return ppc_md.msi_check_device(dev, nvec, type); diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index 2a1295f19832..567ded7c3b9b 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -20,6 +20,7 @@ #include <linux/spi/mmc_spi.h> #include <linux/mmc/host.h> #include <linux/of_platform.h> +#include <linux/fsl_devices.h> #include <asm/time.h> #include <asm/ipic.h> @@ -39,16 +40,116 @@ #endif #ifdef CONFIG_QUICC_ENGINE -static void mpc83xx_spi_activate_cs(u8 cs, u8 polarity) +static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, + struct spi_board_info *board_infos, + unsigned int num_board_infos, + void (*cs_control)(struct spi_device *dev, + bool on)) { - pr_debug("%s %d %d\n", __func__, cs, polarity); - par_io_data_set(3, 13, polarity); + struct device_node *np; + unsigned int i = 0; + + for_each_compatible_node(np, type, compatible) { + int ret; + unsigned int j; + const void *prop; + struct resource res[2]; + struct platform_device *pdev; + struct fsl_spi_platform_data pdata = { + .cs_control = cs_control, + }; + + memset(res, 0, sizeof(res)); + + pdata.sysclk = sysclk; + + prop = of_get_property(np, "reg", NULL); + if (!prop) + goto err; + pdata.bus_num = *(u32 *)prop; + + prop = of_get_property(np, "cell-index", NULL); + if (prop) + i = *(u32 *)prop; + + prop = of_get_property(np, "mode", NULL); + if (prop && !strcmp(prop, "cpu-qe")) + pdata.qe_mode = 1; + + for (j = 0; j < num_board_infos; j++) { + if (board_infos[j].bus_num == pdata.bus_num) + pdata.max_chipselect++; + } + + if (!pdata.max_chipselect) + continue; + + ret = of_address_to_resource(np, 0, &res[0]); + if (ret) + goto err; + + ret = of_irq_to_resource(np, 0, &res[1]); + if (ret == NO_IRQ) + goto err; + + pdev = platform_device_alloc("mpc83xx_spi", i); + if (!pdev) + goto err; + + ret = platform_device_add_data(pdev, &pdata, sizeof(pdata)); + if (ret) + goto unreg; + + ret = platform_device_add_resources(pdev, res, + ARRAY_SIZE(res)); + if (ret) + goto unreg; + + ret = platform_device_add(pdev); + if (ret) + goto unreg; + + goto next; +unreg: + platform_device_del(pdev); +err: + pr_err("%s: registration failed\n", np->full_name); +next: + i++; + } + + return i; } -static void mpc83xx_spi_deactivate_cs(u8 cs, u8 polarity) +static int __init fsl_spi_init(struct spi_board_info *board_infos, + unsigned int num_board_infos, + void (*cs_control)(struct spi_device *spi, + bool on)) { - pr_debug("%s %d %d\n", __func__, cs, polarity); - par_io_data_set(3, 13, !polarity); + u32 sysclk = -1; + int ret; + + /* SPI controller is either clocked from QE or SoC clock */ + sysclk = get_brgfreq(); + if (sysclk == -1) { + sysclk = fsl_get_sys_freq(); + if (sysclk == -1) + return -ENODEV; + } + + ret = of_fsl_spi_probe(NULL, "fsl,spi", sysclk, board_infos, + num_board_infos, cs_control); + if (!ret) + of_fsl_spi_probe("spi", "fsl_spi", sysclk, board_infos, + num_board_infos, cs_control); + + return spi_register_board_info(board_infos, num_board_infos); +} + +static void mpc83xx_spi_cs_control(struct spi_device *spi, bool on) +{ + pr_debug("%s %d %d\n", __func__, spi->chip_select, on); + par_io_data_set(3, 13, on); } static struct mmc_spi_platform_data mpc832x_mmc_pdata = { @@ -74,9 +175,13 @@ static int __init mpc832x_spi_init(void) par_io_config_pin(3, 14, 2, 0, 0, 0); /* SD_INSERT, I */ par_io_config_pin(3, 15, 2, 0, 0, 0); /* SD_PROTECT,I */ - return fsl_spi_init(&mpc832x_spi_boardinfo, 1, - mpc83xx_spi_activate_cs, - mpc83xx_spi_deactivate_cs); + /* + * Don't bother with legacy stuff when device tree contains + * mmc-spi-slot node. + */ + if (of_find_compatible_node(NULL, NULL, "mmc-spi-slot")) + return 0; + return fsl_spi_init(&mpc832x_spi_boardinfo, 1, mpc83xx_spi_cs_control); } machine_device_initcall(mpc832x_rdb, mpc832x_spi_init); #endif /* CONFIG_QUICC_ENGINE */ diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index a01c89d3f9bd..afe8dbc964aa 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -417,115 +417,6 @@ err: arch_initcall(fsl_usb_of_init); -static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, - struct spi_board_info *board_infos, - unsigned int num_board_infos, - void (*activate_cs)(u8 cs, u8 polarity), - void (*deactivate_cs)(u8 cs, u8 polarity)) -{ - struct device_node *np; - unsigned int i = 0; - - for_each_compatible_node(np, type, compatible) { - int ret; - unsigned int j; - const void *prop; - struct resource res[2]; - struct platform_device *pdev; - struct fsl_spi_platform_data pdata = { - .activate_cs = activate_cs, - .deactivate_cs = deactivate_cs, - }; - - memset(res, 0, sizeof(res)); - - pdata.sysclk = sysclk; - - prop = of_get_property(np, "reg", NULL); - if (!prop) - goto err; - pdata.bus_num = *(u32 *)prop; - - prop = of_get_property(np, "cell-index", NULL); - if (prop) - i = *(u32 *)prop; - - prop = of_get_property(np, "mode", NULL); - if (prop && !strcmp(prop, "cpu-qe")) - pdata.qe_mode = 1; - - for (j = 0; j < num_board_infos; j++) { - if (board_infos[j].bus_num == pdata.bus_num) - pdata.max_chipselect++; - } - - if (!pdata.max_chipselect) - continue; - - ret = of_address_to_resource(np, 0, &res[0]); - if (ret) - goto err; - - ret = of_irq_to_resource(np, 0, &res[1]); - if (ret == NO_IRQ) - goto err; - - pdev = platform_device_alloc("mpc83xx_spi", i); - if (!pdev) - goto err; - - ret = platform_device_add_data(pdev, &pdata, sizeof(pdata)); - if (ret) - goto unreg; - - ret = platform_device_add_resources(pdev, res, - ARRAY_SIZE(res)); - if (ret) - goto unreg; - - ret = platform_device_add(pdev); - if (ret) - goto unreg; - - goto next; -unreg: - platform_device_del(pdev); -err: - pr_err("%s: registration failed\n", np->full_name); -next: - i++; - } - - return i; -} - -int __init fsl_spi_init(struct spi_board_info *board_infos, - unsigned int num_board_infos, - void (*activate_cs)(u8 cs, u8 polarity), - void (*deactivate_cs)(u8 cs, u8 polarity)) -{ - u32 sysclk = -1; - int ret; - -#ifdef CONFIG_QUICC_ENGINE - /* SPI controller is either clocked from QE or SoC clock */ - sysclk = get_brgfreq(); -#endif - if (sysclk == -1) { - sysclk = fsl_get_sys_freq(); - if (sysclk == -1) - return -ENODEV; - } - - ret = of_fsl_spi_probe(NULL, "fsl,spi", sysclk, board_infos, - num_board_infos, activate_cs, deactivate_cs); - if (!ret) - of_fsl_spi_probe("spi", "fsl_spi", sysclk, board_infos, - num_board_infos, activate_cs, deactivate_cs); - - return spi_register_board_info(board_infos, num_board_infos); -} - #if defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_86xx) static __be32 __iomem *rstcr; diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h index 9c744e4285a0..42381bb6cd51 100644 --- a/arch/powerpc/sysdev/fsl_soc.h +++ b/arch/powerpc/sysdev/fsl_soc.h @@ -4,6 +4,8 @@ #include <asm/mmu.h> +struct spi_device; + extern phys_addr_t get_immrbase(void); #if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx) extern u32 get_brgfreq(void); @@ -17,11 +19,6 @@ extern u32 fsl_get_sys_freq(void); struct spi_board_info; struct device_node; -extern int fsl_spi_init(struct spi_board_info *board_infos, - unsigned int num_board_infos, - void (*activate_cs)(u8 cs, u8 polarity), - void (*deactivate_cs)(u8 cs, u8 polarity)); - extern void fsl_rstcr_restart(char *cmd); #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2a8af5e16345..dcb667c4375a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -72,6 +72,9 @@ config PGSTE config VIRT_CPU_ACCOUNTING def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + mainmenu "Linux Kernel Configuration" config S390 diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 4599fa06bd82..7e297a3cde34 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -9,6 +9,7 @@ source "lib/Kconfig.debug" config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC help Unmap pages from the kernel linear mapping after free_pages(). This results in a slowdown, but helps to find certain types of diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index b1e892a43816..704dd396257b 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -12,6 +12,8 @@ #include <linux/types.h> #include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/vmalloc.h> #include <asm/ebcdic.h> diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 6dccb071aec3..619bf94b11f1 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -456,6 +456,8 @@ struct ciw { #define CIO_OPER 0x0004 /* Sick revalidation of device. */ #define CIO_REVALIDATE 0x0008 +/* Device did not respond in time. */ +#define CIO_BOXED 0x0010 /** * struct ccw_dev_id - unique identifier for ccw devices diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c3ea215334f6..cc12cd48bbc5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -124,6 +124,9 @@ config ARCH_NO_VIRT_TO_BUS config OF def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y if SPARC64 + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index b8a15e271bfa..d001b42041a5 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -24,7 +24,8 @@ config STACK_DEBUG config DEBUG_PAGEALLOC bool "Debug page memory allocations" - depends on SPARC64 && DEBUG_KERNEL && !HIBERNATION + depends on DEBUG_KERNEL && !HIBERNATION + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC help Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 752d0c9fb544..7916feba6e4a 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -39,6 +39,7 @@ void *kmap_atomic(struct page *page, enum km_type type) if (!PageHighMem(page)) return page_address(page); + debug_kmap_atomic(type); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h index 96b80b565eeb..d8ba6153f912 100644 --- a/arch/um/drivers/pcap_user.h +++ b/arch/um/drivers/pcap_user.h @@ -19,13 +19,3 @@ extern const struct net_user_info pcap_user_info; extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/port.h b/arch/um/drivers/port.h index 9117609a575d..372a80c0556a 100644 --- a/arch/um/drivers/port.h +++ b/arch/um/drivers/port.h @@ -18,13 +18,3 @@ extern void port_remove_dev(void *d); #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/ssl.h b/arch/um/drivers/ssl.h index 98412aa66607..314d17725ce6 100644 --- a/arch/um/drivers/ssl.h +++ b/arch/um/drivers/ssl.h @@ -11,13 +11,3 @@ extern void ssl_receive_char(int line, char ch); #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/stdio_console.h b/arch/um/drivers/stdio_console.h index 505a3d5bea5e..6d8275f71fd4 100644 --- a/arch/um/drivers/stdio_console.h +++ b/arch/um/drivers/stdio_console.h @@ -9,13 +9,3 @@ extern void save_console_flags(void); #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 0a868118cf06..d42f826a8ab9 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -17,7 +17,6 @@ * James McMechan */ -#define MAJOR_NR UBD_MAJOR #define UBD_SHIFT 4 #include "linux/kernel.h" @@ -115,7 +114,7 @@ static struct block_device_operations ubd_blops = { }; /* Protected by ubd_lock */ -static int fake_major = MAJOR_NR; +static int fake_major = UBD_MAJOR; static struct gendisk *ubd_gendisk[MAX_DEV]; static struct gendisk *fake_gendisk[MAX_DEV]; @@ -299,7 +298,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) } mutex_lock(&ubd_lock); - if(fake_major != MAJOR_NR){ + if (fake_major != UBD_MAJOR) { *error_out = "Can't assign a fake major twice"; goto out1; } @@ -818,13 +817,13 @@ static int ubd_disk_register(int major, u64 size, int unit, disk->first_minor = unit << UBD_SHIFT; disk->fops = &ubd_blops; set_capacity(disk, size / 512); - if(major == MAJOR_NR) + if (major == UBD_MAJOR) sprintf(disk->disk_name, "ubd%c", 'a' + unit); else sprintf(disk->disk_name, "ubd_fake%d", unit); /* sysfs register (not for ide fake devices) */ - if (major == MAJOR_NR) { + if (major == UBD_MAJOR) { ubd_devs[unit].pdev.id = unit; ubd_devs[unit].pdev.name = DRIVER_NAME; ubd_devs[unit].pdev.dev.release = ubd_device_release; @@ -871,13 +870,13 @@ static int ubd_add(int n, char **error_out) ubd_dev->queue->queuedata = ubd_dev; blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG); - err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]); + err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; goto out_cleanup; } - if(fake_major != MAJOR_NR) + if (fake_major != UBD_MAJOR) ubd_disk_register(fake_major, ubd_dev->size, n, &fake_gendisk[n]); @@ -1059,10 +1058,10 @@ static int __init ubd_init(void) char *error; int i, err; - if (register_blkdev(MAJOR_NR, "ubd")) + if (register_blkdev(UBD_MAJOR, "ubd")) return -1; - if (fake_major != MAJOR_NR) { + if (fake_major != UBD_MAJOR) { char name[sizeof("ubd_nnn\0")]; snprintf(name, sizeof(name), "ubd_%d", fake_major); diff --git a/arch/um/drivers/xterm.h b/arch/um/drivers/xterm.h index f33a6e77b186..56b9c4aba423 100644 --- a/arch/um/drivers/xterm.h +++ b/arch/um/drivers/xterm.h @@ -10,13 +10,3 @@ extern int xterm_fd(int socket, int *pid_out); #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/asm/irq_vectors.h b/arch/um/include/asm/irq_vectors.h index 62ddba6fc733..272a81e0ce14 100644 --- a/arch/um/include/asm/irq_vectors.h +++ b/arch/um/include/asm/irq_vectors.h @@ -8,13 +8,3 @@ #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h index 2cf35c21d694..cf259de51531 100644 --- a/arch/um/include/asm/mmu.h +++ b/arch/um/include/asm/mmu.h @@ -10,13 +10,3 @@ #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/asm/pda.h b/arch/um/include/asm/pda.h index 0d8bf33ffd42..ddcd774fc2a0 100644 --- a/arch/um/include/asm/pda.h +++ b/arch/um/include/asm/pda.h @@ -19,13 +19,3 @@ extern struct foo me; #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 9062a6e72241..718984359f8c 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -60,13 +60,3 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index 0446f456b428..084de4a9fc70 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -134,13 +134,3 @@ static inline pmd_t pfn_pmd(pfn_t page_nr, pgprot_t pgprot) #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/shared/frame_kern.h b/arch/um/include/shared/frame_kern.h index ce9514f57211..76078490c258 100644 --- a/arch/um/include/shared/frame_kern.h +++ b/arch/um/include/shared/frame_kern.h @@ -20,13 +20,3 @@ extern int setup_signal_stack_si(unsigned long stack_top, int sig, #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/shared/initrd.h b/arch/um/include/shared/initrd.h index 439b9a814985..22673bcc273d 100644 --- a/arch/um/include/shared/initrd.h +++ b/arch/um/include/shared/initrd.h @@ -10,13 +10,3 @@ extern int load_initrd(char *filename, void *buf, int size); #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h index fba3895274f9..b05d22f3d84e 100644 --- a/arch/um/include/shared/irq_kern.h +++ b/arch/um/include/shared/irq_kern.h @@ -16,13 +16,3 @@ extern int um_request_irq(unsigned int irq, int fd, int type, #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/shared/mem_kern.h b/arch/um/include/shared/mem_kern.h index cb7e196d366b..69be0fd0ce4b 100644 --- a/arch/um/include/shared/mem_kern.h +++ b/arch/um/include/shared/mem_kern.h @@ -18,13 +18,3 @@ extern void register_remapper(struct remapper *info); #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/shared/ubd_user.h b/arch/um/include/shared/ubd_user.h index bb66517f0739..3845051f1b10 100644 --- a/arch/um/include/shared/ubd_user.h +++ b/arch/um/include/shared/ubd_user.h @@ -14,13 +14,3 @@ extern int kernel_fd; #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 499e5e95e609..388ec0a3ea9b 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -28,7 +28,7 @@ $(obj)/config.tmp: $(objtree)/.config FORCE $(call if_changed,quote1) quiet_cmd_quote1 = QUOTE $@ - cmd_quote1 = sed -e 's/"/\\"/g' -e 's/^/"/' -e 's/$$/\\n"/' \ + cmd_quote1 = sed -e 's/"/\\"/g' -e 's/^/"/' -e 's/$$/\\n",/' \ $< > $@ $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE @@ -36,9 +36,9 @@ $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE quiet_cmd_quote2 = QUOTE $@ cmd_quote2 = sed -e '/CONFIG/{' \ - -e 's/"CONFIG"\;/""/' \ + -e 's/"CONFIG"//' \ -e 'r $(obj)/config.tmp' \ -e 'a \' \ - -e '""\;' \ + -e '""' \ -e '}' \ $< > $@ diff --git a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in index c062cbfe386e..b7a43feafde7 100644 --- a/arch/um/kernel/config.c.in +++ b/arch/um/kernel/config.c.in @@ -7,11 +7,15 @@ #include <stdlib.h> #include "init.h" -static __initdata char *config = "CONFIG"; +static __initdata const char *config[] = { +"CONFIG" +}; static int __init print_config(char *line, int *add) { - printf("%s", config); + int i; + for (i = 0; i < sizeof(config)/sizeof(config[0]); i++) + printf("%s", config[i]); exit(0); } @@ -20,13 +24,3 @@ __uml_setup("--showconfig", print_config, " Prints the config file that this UML binary was generated from.\n\n" ); -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 183db26d01bf..02ee9adff54a 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -244,7 +244,7 @@ static void __init check_sysemu(void) if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *) PTRACE_O_TRACESYSGOOD) < 0)) - fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed"); + fatal_perror("check_sysemu: PTRACE_OLDSETOPTIONS failed"); while (1) { count++; @@ -252,12 +252,12 @@ static void __init check_sysemu(void) goto fail; CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); if (n < 0) - fatal_perror("check_ptrace : wait failed"); + fatal_perror("check_sysemu: wait failed"); if (WIFSTOPPED(status) && (WSTOPSIG(status) == (SIGTRAP|0x80))) { if (!count) { - non_fatal("check_ptrace : SYSEMU_SINGLESTEP " + non_fatal("check_sysemu: SYSEMU_SINGLESTEP " "doesn't singlestep"); goto fail; } @@ -271,7 +271,7 @@ static void __init check_sysemu(void) else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) count++; else { - non_fatal("check_ptrace : expected SIGTRAP or " + non_fatal("check_sysemu: expected SIGTRAP or " "(SIGTRAP | 0x80), got status = %d\n", status); goto fail; diff --git a/arch/um/sys-i386/asm/archparam.h b/arch/um/sys-i386/asm/archparam.h index 93fd723344e5..2a18a884ca1b 100644 --- a/arch/um/sys-i386/asm/archparam.h +++ b/arch/um/sys-i386/asm/archparam.h @@ -14,13 +14,3 @@ #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-i386/shared/sysdep/checksum.h b/arch/um/sys-i386/shared/sysdep/checksum.h index 0cb4645cbeb8..ed47445f3905 100644 --- a/arch/um/sys-i386/shared/sysdep/checksum.h +++ b/arch/um/sys-i386/shared/sysdep/checksum.h @@ -199,13 +199,3 @@ static __inline__ __wsum csum_and_copy_to_user(const void *src, #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ia64/sysdep/ptrace.h b/arch/um/sys-ia64/sysdep/ptrace.h index 42dd8fb6f2f9..0f0f4e6fd334 100644 --- a/arch/um/sys-ia64/sysdep/ptrace.h +++ b/arch/um/sys-ia64/sysdep/ptrace.h @@ -14,13 +14,3 @@ struct sys_pt_regs { #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ia64/sysdep/sigcontext.h b/arch/um/sys-ia64/sysdep/sigcontext.h index f15fb25260ba..76b43161e779 100644 --- a/arch/um/sys-ia64/sysdep/sigcontext.h +++ b/arch/um/sys-ia64/sysdep/sigcontext.h @@ -8,13 +8,3 @@ #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ia64/sysdep/syscalls.h b/arch/um/sys-ia64/sysdep/syscalls.h index 4a1f46ef1ebc..5f6700c41558 100644 --- a/arch/um/sys-ia64/sysdep/syscalls.h +++ b/arch/um/sys-ia64/sysdep/syscalls.h @@ -8,13 +8,3 @@ #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ppc/miscthings.c b/arch/um/sys-ppc/miscthings.c index 373061c50129..1c11aed9c719 100644 --- a/arch/um/sys-ppc/miscthings.c +++ b/arch/um/sys-ppc/miscthings.c @@ -40,14 +40,3 @@ void shove_aux_table(unsigned long sp) } /* END stuff taken from arch/ppc/kernel/process.c */ - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ppc/ptrace.c b/arch/um/sys-ppc/ptrace.c index 8e71b47f2b8e..66ef155248f1 100644 --- a/arch/um/sys-ppc/ptrace.c +++ b/arch/um/sys-ppc/ptrace.c @@ -56,13 +56,3 @@ int peek_user(struct task_struct *child, long addr, long data) return put_user(tmp, (unsigned long *) data); } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ppc/ptrace_user.c b/arch/um/sys-ppc/ptrace_user.c index ff0b9c077a13..224d2403c37b 100644 --- a/arch/um/sys-ppc/ptrace_user.c +++ b/arch/um/sys-ppc/ptrace_user.c @@ -27,13 +27,3 @@ int ptrace_setregs(long pid, unsigned long *regs_in) } return 0; } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ppc/shared/sysdep/ptrace.h b/arch/um/sys-ppc/shared/sysdep/ptrace.h index df2397dba3e5..0e3230e937e1 100644 --- a/arch/um/sys-ppc/shared/sysdep/ptrace.h +++ b/arch/um/sys-ppc/shared/sysdep/ptrace.h @@ -91,13 +91,3 @@ extern void shove_aux_table(unsigned long sp); #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ppc/shared/sysdep/sigcontext.h b/arch/um/sys-ppc/shared/sysdep/sigcontext.h index f20d965de9c7..b7286f0a1e00 100644 --- a/arch/um/sys-ppc/shared/sysdep/sigcontext.h +++ b/arch/um/sys-ppc/shared/sysdep/sigcontext.h @@ -50,13 +50,3 @@ #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ppc/shared/sysdep/syscalls.h b/arch/um/sys-ppc/shared/sysdep/syscalls.h index 679df351e19b..1ff81552251c 100644 --- a/arch/um/sys-ppc/shared/sysdep/syscalls.h +++ b/arch/um/sys-ppc/shared/sysdep/syscalls.h @@ -41,13 +41,3 @@ int old_mmap(unsigned long addr, unsigned long len, #define LAST_ARCH_SYSCALL __NR_fadvise64 -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-ppc/sigcontext.c b/arch/um/sys-ppc/sigcontext.c index 4bdc15c89edd..40694d0f3d15 100644 --- a/arch/um/sys-ppc/sigcontext.c +++ b/arch/um/sys-ppc/sigcontext.c @@ -2,13 +2,3 @@ #include "asm/sigcontext.h" #include "sysdep/ptrace.h" -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-x86_64/asm/archparam.h b/arch/um/sys-x86_64/asm/archparam.h index 270ed9586b68..6c083663b8d9 100644 --- a/arch/um/sys-x86_64/asm/archparam.h +++ b/arch/um/sys-x86_64/asm/archparam.h @@ -14,13 +14,3 @@ #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-x86_64/asm/module.h b/arch/um/sys-x86_64/asm/module.h index 35b5491d3e96..8eb79c2d07d5 100644 --- a/arch/um/sys-x86_64/asm/module.h +++ b/arch/um/sys-x86_64/asm/module.h @@ -18,13 +18,3 @@ struct mod_arch_specific #endif -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-x86_64/mem.c b/arch/um/sys-x86_64/mem.c index 3f59a0a4f156..3f8df8abf347 100644 --- a/arch/um/sys-x86_64/mem.c +++ b/arch/um/sys-x86_64/mem.c @@ -14,12 +14,3 @@ unsigned long vm_data_default_flags = __VM_DATA_DEFAULT_FLAGS; unsigned long vm_data_default_flags32 = __VM_DATA_DEFAULT_FLAGS; unsigned long vm_force_exec32 = PROT_EXEC; -/* Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 45161b816313..748e50a1a152 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -165,6 +165,9 @@ config AUDIT_ARCH config ARCH_SUPPORTS_OPTIMIZED_INLINING def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + # Use the generic interrupt handling code in kernel/irq/: config GENERIC_HARDIRQS bool diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index fdb45df608b6..a345cb5447a8 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -75,6 +75,7 @@ config DEBUG_STACK_USAGE config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL + depends on ARCH_SUPPORTS_DEBUG_PAGEALLOC ---help--- Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 8c3c25f35578..5054c2ddd1a0 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -16,24 +17,38 @@ #define SMAP 0x534d4150 /* ASCII "SMAP" */ +struct e820_ext_entry { + struct e820entry std; + u32 ext_flags; +} __attribute__((packed)); + static int detect_memory_e820(void) { int count = 0; u32 next = 0; - u32 size, id; + u32 size, id, edi; u8 err; struct e820entry *desc = boot_params.e820_map; + static struct e820_ext_entry buf; /* static so it is zeroed */ + + /* + * Set this here so that if the BIOS doesn't change this field + * but still doesn't change %ecx, we're still okay... + */ + buf.ext_flags = 1; do { - size = sizeof(struct e820entry); + size = sizeof buf; - /* Important: %edx is clobbered by some BIOSes, - so it must be either used for the error output - or explicitly marked clobbered. */ - asm("int $0x15; setc %0" + /* Important: %edx and %esi are clobbered by some BIOSes, + so they must be either used for the error output + or explicitly marked clobbered. Given that, assume there + is something out there clobbering %ebp and %edi, too. */ + asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0" : "=d" (err), "+b" (next), "=a" (id), "+c" (size), - "=m" (*desc) - : "D" (desc), "d" (SMAP), "a" (0xe820)); + "=D" (edi), "+m" (buf) + : "D" (&buf), "d" (SMAP), "a" (0xe820) + : "esi"); /* BIOSes which terminate the chain with CF = 1 as opposed to %ebx = 0 don't always report the SMAP signature on @@ -51,8 +66,14 @@ static int detect_memory_e820(void) break; } + /* ACPI 3.0 added the extended flags support. If bit 0 + in the extended flags is zero, we're supposed to simply + ignore the entry -- a backwards incompatible change! */ + if (size > 20 && !(buf.ext_flags & 1)) + continue; + + *desc++ = buf.std; count++; - desc++; } while (next && count < ARRAY_SIZE(boot_params.e820_map)); return boot_params.e820_entries = count; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index a977de23cb4d..a0301bfeb954 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -86,6 +86,9 @@ static inline void early_quirks(void) { } extern void pci_iommu_alloc(void); +/* MSI arch hook */ +#define arch_setup_msi_irqs arch_setup_msi_irqs + #endif /* __KERNEL__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index a5074bd0f8be..48dcfa62ea07 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -24,28 +24,4 @@ struct saved_context { unsigned long return_address; } __attribute__((packed)); -#ifdef CONFIG_ACPI -extern unsigned long saved_eip; -extern unsigned long saved_esp; -extern unsigned long saved_ebp; -extern unsigned long saved_ebx; -extern unsigned long saved_esi; -extern unsigned long saved_edi; - -static inline void acpi_save_register_state(unsigned long return_point) -{ - saved_eip = return_point; - asm volatile("movl %%esp,%0" : "=m" (saved_esp)); - asm volatile("movl %%ebp,%0" : "=m" (saved_ebp)); - asm volatile("movl %%ebx,%0" : "=m" (saved_ebx)); - asm volatile("movl %%edi,%0" : "=m" (saved_edi)); - asm volatile("movl %%esi,%0" : "=m" (saved_esi)); -} - -#define acpi_restore_register_state() do {} while (0) - -/* routines for saving/restoring kernel state */ -extern int acpi_save_state_mem(void); -#endif - #endif /* _ASM_X86_SUSPEND_32_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index da99ffcdfde6..1bb5c6cee3eb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3468,6 +3468,10 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct intel_iommu *iommu = NULL; int index = 0; + /* x86 doesn't support multiple MSI yet */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index fbf2f33e3080..5a6aa1c1162f 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -18,6 +18,7 @@ #include <asm/thread_info.h> #include <asm/bootparam.h> #include <asm/elf.h> +#include <asm/suspend.h> #include <xen/interface/xen.h> diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 8793ab33e2c1..e72f062fb4b5 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -16,6 +16,7 @@ #include <asm/thread_info.h> #include <asm/ia32.h> #include <asm/bootparam.h> +#include <asm/suspend.h> #include <xen/interface/xen.h> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index c7c4776ff630..90f5b9ef5def 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -300,8 +300,7 @@ fs_initcall(pci_iommu_init); static __devinit void via_no_dac(struct pci_dev *dev) { if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO - "PCI: VIA PCI bridge detected. Disabling DAC.\n"); + dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); forbid_dac = 1; } } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 522db5e3d0bf..5bc5d1688c1c 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -19,49 +19,6 @@ void kunmap(struct page *page) kunmap_high(page); } -static void debug_kmap_atomic_prot(enum km_type type) -{ -#ifdef CONFIG_DEBUG_HIGHMEM - static unsigned warn_count = 10; - - if (unlikely(warn_count == 0)) - return; - - if (unlikely(in_interrupt())) { - if (in_irq()) { - if (type != KM_IRQ0 && type != KM_IRQ1 && - type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && - type != KM_BOUNCE_READ) { - WARN_ON(1); - warn_count--; - } - } else if (!irqs_disabled()) { /* softirq */ - if (type != KM_IRQ0 && type != KM_IRQ1 && - type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && - type != KM_SKB_SUNRPC_DATA && - type != KM_SKB_DATA_SOFTIRQ && - type != KM_BOUNCE_READ) { - WARN_ON(1); - warn_count--; - } - } - } - - if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || - type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { - if (!irqs_disabled()) { - WARN_ON(1); - warn_count--; - } - } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { - if (irq_count() == 0 && !irqs_disabled()) { - WARN_ON(1); - warn_count--; - } - } -#endif -} - /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -81,8 +38,9 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) if (!PageHighMem(page)) return page_address(page); - debug_kmap_atomic_prot(type); + debug_kmap_atomic(type); + debug_kmap_atomic(type); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 699c9b2895ae..bff0c9032f8c 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -19,6 +19,7 @@ #include <asm/iomap.h> #include <asm/pat.h> #include <linux/module.h> +#include <linux/highmem.h> int is_io_mapping_possible(resource_size_t base, unsigned long size) { @@ -71,6 +72,7 @@ iounmap_atomic(void *kvaddr, enum km_type type) unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + debug_kmap_atomic(type); /* * Force other mappings to Oops if they'll try to access this pte * without first remap it. Keeping stale mappings around is a bad idea diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index f6adf2c6d751..aaf26ae58cd5 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -69,11 +69,12 @@ void early_dump_pci_device(u8 bus, u8 slot, u8 func) int j; u32 val; - printk(KERN_INFO "PCI: %02x:%02x:%02x", bus, slot, func); + printk(KERN_INFO "pci 0000:%02x:%02x.%d config space:", + bus, slot, func); for (i = 0; i < 256; i += 4) { if (!(i & 0x0f)) - printk("\n%04x:",i); + printk("\n %02x:",i); val = read_pci_config(bus, slot, func, i); for (j = 0; j < 4; j++) { @@ -96,20 +97,22 @@ void early_dump_pci_devices(void) for (func = 0; func < 8; func++) { u32 class; u8 type; + class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); if (class == 0xffffffff) - break; + continue; early_dump_pci_device(bus, slot, func); - /* No multi-function device? */ - type = read_pci_config_byte(bus, slot, func, + if (func == 0) { + type = read_pci_config_byte(bus, slot, + func, PCI_HEADER_TYPE); - if (!(type & 0x80)) - break; + if (!(type & 0x80)) + break; + } } } } } - diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 9c49919e4d1c..6dd89555fbfa 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -495,26 +495,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); /* - * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have - * 4096 bytes configuration space for each function of their processor - * configuration space. - */ -static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev) -{ - dev->cfg_size = pci_cfg_space_size_ext(dev); -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size); - -/* * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from * confusing the PCI engine: */ diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index f1065b129e9c..4061bb0f267d 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -50,8 +50,6 @@ static int __init pci_legacy_init(void) if (pci_root_bus) pci_bus_add_devices(pci_root_bus); - pcibios_fixup_peer_bridges(); - return 0; } @@ -67,6 +65,7 @@ int __init pci_subsys_init(void) pci_visws_init(); #endif pci_legacy_init(); + pcibios_fixup_peer_bridges(); pcibios_irq_init(); pcibios_init(); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 89bf9242c80a..905bb526b133 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/acpi.h> #include <linux/bitmap.h> +#include <linux/sort.h> #include <asm/e820.h> #include <asm/pci_x86.h> @@ -24,24 +25,49 @@ /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; +static __init int extend_mmcfg(int num) +{ + struct acpi_mcfg_allocation *new; + int new_num = pci_mmcfg_config_num + num; + + new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); + if (!new) + return -1; + + if (pci_mmcfg_config) { + memcpy(new, pci_mmcfg_config, + sizeof(pci_mmcfg_config[0]) * new_num); + kfree(pci_mmcfg_config); + } + pci_mmcfg_config = new; + + return 0; +} + +static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end) +{ + int i = pci_mmcfg_config_num; + + pci_mmcfg_config_num++; + pci_mmcfg_config[i].address = addr; + pci_mmcfg_config[i].pci_segment = segment; + pci_mmcfg_config[i].start_bus_number = start; + pci_mmcfg_config[i].end_bus_number = end; +} + static const char __init *pci_mmcfg_e7520(void) { u32 win; raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win); win = win & 0xf000; - if(win == 0x0000 || win == 0xf000) - pci_mmcfg_config_num = 0; - else { - pci_mmcfg_config_num = 1; - pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); - if (!pci_mmcfg_config) - return NULL; - pci_mmcfg_config[0].address = win << 16; - pci_mmcfg_config[0].pci_segment = 0; - pci_mmcfg_config[0].start_bus_number = 0; - pci_mmcfg_config[0].end_bus_number = 255; - } + if (win == 0x0000 || win == 0xf000) + return NULL; + + if (extend_mmcfg(1) == -1) + return NULL; + + fill_one_mmcfg(win << 16, 0, 0, 255); return "Intel Corporation E7520 Memory Controller Hub"; } @@ -50,13 +76,11 @@ static const char __init *pci_mmcfg_intel_945(void) { u32 pciexbar, mask = 0, len = 0; - pci_mmcfg_config_num = 1; - raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar); /* Enable bit */ if (!(pciexbar & 1)) - pci_mmcfg_config_num = 0; + return NULL; /* Size bits */ switch ((pciexbar >> 1) & 3) { @@ -73,28 +97,23 @@ static const char __init *pci_mmcfg_intel_945(void) len = 0x04000000U; break; default: - pci_mmcfg_config_num = 0; + return NULL; } /* Errata #2, things break when not aligned on a 256Mb boundary */ /* Can only happen in 64M/128M mode */ if ((pciexbar & mask) & 0x0fffffffU) - pci_mmcfg_config_num = 0; + return NULL; /* Don't hit the APIC registers and their friends */ if ((pciexbar & mask) >= 0xf0000000U) - pci_mmcfg_config_num = 0; - - if (pci_mmcfg_config_num) { - pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); - if (!pci_mmcfg_config) - return NULL; - pci_mmcfg_config[0].address = pciexbar & mask; - pci_mmcfg_config[0].pci_segment = 0; - pci_mmcfg_config[0].start_bus_number = 0; - pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1; - } + return NULL; + + if (extend_mmcfg(1) == -1) + return NULL; + + fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1); return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; } @@ -138,22 +157,77 @@ static const char __init *pci_mmcfg_amd_fam10h(void) busnbits = 8; } - pci_mmcfg_config_num = (1 << segnbits); - pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) * - pci_mmcfg_config_num, GFP_KERNEL); - if (!pci_mmcfg_config) + if (extend_mmcfg(1 << segnbits) == -1) return NULL; - for (i = 0; i < (1 << segnbits); i++) { - pci_mmcfg_config[i].address = base + (1<<28) * i; - pci_mmcfg_config[i].pci_segment = i; - pci_mmcfg_config[i].start_bus_number = 0; - pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1; - } + for (i = 0; i < (1 << segnbits); i++) + fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1); return "AMD Family 10h NB"; } +static bool __initdata mcp55_checked; +static const char __init *pci_mmcfg_nvidia_mcp55(void) +{ + int bus; + int mcp55_mmconf_found = 0; + + static const u32 extcfg_regnum = 0x90; + static const u32 extcfg_regsize = 4; + static const u32 extcfg_enable_mask = 1<<31; + static const u32 extcfg_start_mask = 0xff<<16; + static const int extcfg_start_shift = 16; + static const u32 extcfg_size_mask = 0x3<<28; + static const int extcfg_size_shift = 28; + static const int extcfg_sizebus[] = {0x100, 0x80, 0x40, 0x20}; + static const u32 extcfg_base_mask[] = {0x7ff8, 0x7ffc, 0x7ffe, 0x7fff}; + static const int extcfg_base_lshift = 25; + + /* + * do check if amd fam10h already took over + */ + if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) + return NULL; + + mcp55_checked = true; + for (bus = 0; bus < 256; bus++) { + u64 base; + u32 l, extcfg; + u16 vendor, device; + int start, size_index, end; + + raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), 0, 4, &l); + vendor = l & 0xffff; + device = (l >> 16) & 0xffff; + + if (PCI_VENDOR_ID_NVIDIA != vendor || 0x0369 != device) + continue; + + raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), extcfg_regnum, + extcfg_regsize, &extcfg); + + if (!(extcfg & extcfg_enable_mask)) + continue; + + if (extend_mmcfg(1) == -1) + continue; + + size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; + base = extcfg & extcfg_base_mask[size_index]; + /* base could > 4G */ + base <<= extcfg_base_lshift; + start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; + end = start + extcfg_sizebus[size_index] - 1; + fill_one_mmcfg(base, 0, start, end); + mcp55_mmconf_found++; + } + + if (!mcp55_mmconf_found) + return NULL; + + return "nVidia MCP55"; +} + struct pci_mmcfg_hostbridge_probe { u32 bus; u32 devfn; @@ -171,8 +245,52 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { 0x1200, pci_mmcfg_amd_fam10h }, { 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD, 0x1200, pci_mmcfg_amd_fam10h }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_NVIDIA, + 0x0369, pci_mmcfg_nvidia_mcp55 }, }; +static int __init cmp_mmcfg(const void *x1, const void *x2) +{ + const typeof(pci_mmcfg_config[0]) *m1 = x1; + const typeof(pci_mmcfg_config[0]) *m2 = x2; + int start1, start2; + + start1 = m1->start_bus_number; + start2 = m2->start_bus_number; + + return start1 - start2; +} + +static void __init pci_mmcfg_check_end_bus_number(void) +{ + int i; + typeof(pci_mmcfg_config[0]) *cfg, *cfgx; + + /* sort them at first */ + sort(pci_mmcfg_config, pci_mmcfg_config_num, + sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL); + + /* last one*/ + if (pci_mmcfg_config_num > 0) { + i = pci_mmcfg_config_num - 1; + cfg = &pci_mmcfg_config[i]; + if (cfg->end_bus_number < cfg->start_bus_number) + cfg->end_bus_number = 255; + } + + /* don't overlap please */ + for (i = 0; i < pci_mmcfg_config_num - 1; i++) { + cfg = &pci_mmcfg_config[i]; + cfgx = &pci_mmcfg_config[i+1]; + + if (cfg->end_bus_number < cfg->start_bus_number) + cfg->end_bus_number = 255; + + if (cfg->end_bus_number >= cfgx->start_bus_number) + cfg->end_bus_number = cfgx->start_bus_number - 1; + } +} + static int __init pci_mmcfg_check_hostbridge(void) { u32 l; @@ -186,31 +304,33 @@ static int __init pci_mmcfg_check_hostbridge(void) pci_mmcfg_config_num = 0; pci_mmcfg_config = NULL; - name = NULL; - for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { + for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { bus = pci_mmcfg_probes[i].bus; devfn = pci_mmcfg_probes[i].devfn; raw_pci_ops->read(0, bus, devfn, 0, 4, &l); vendor = l & 0xffff; device = (l >> 16) & 0xffff; + name = NULL; if (pci_mmcfg_probes[i].vendor == vendor && pci_mmcfg_probes[i].device == device) name = pci_mmcfg_probes[i].probe(); - } - if (name) { - printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n", - name, pci_mmcfg_config_num ? "with" : "without"); + if (name) + printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n", + name); } - return name != NULL; + /* some end_bus_number is crazy, fix it */ + pci_mmcfg_check_end_bus_number(); + + return pci_mmcfg_config_num != 0; } static void __init pci_mmcfg_insert_resources(void) { -#define PCI_MMCFG_RESOURCE_NAME_LEN 19 +#define PCI_MMCFG_RESOURCE_NAME_LEN 24 int i; struct resource *res; char *names; @@ -228,9 +348,10 @@ static void __init pci_mmcfg_insert_resources(void) struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; res->name = names; - snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u", - cfg->pci_segment); - res->start = cfg->address; + snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment, + cfg->start_bus_number, cfg->end_bus_number); + res->start = cfg->address + (cfg->start_bus_number << 20); res->end = res->start + (num_buses << 20) - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; insert_resource(&iomem_resource, res); @@ -354,8 +475,6 @@ static void __init pci_mmcfg_reject_broken(int early) (pci_mmcfg_config[0].address == 0)) return; - cfg = &pci_mmcfg_config[0]; - for (i = 0; i < pci_mmcfg_config_num; i++) { int valid = 0; u64 addr, size; @@ -423,10 +542,10 @@ static void __init __pci_mmcfg_init(int early) known_bridge = 1; } - if (!known_bridge) { + if (!known_bridge) acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(early); - } + + pci_mmcfg_reject_broken(early); if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 30007ffc8e11..94349f8b2f96 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -112,13 +112,18 @@ static struct pci_raw_ops pci_mmcfg = { static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) { void __iomem *addr; - u32 size; - - size = (cfg->end_bus_number + 1) << 20; - addr = ioremap_nocache(cfg->address, size); + u64 start, size; + + start = cfg->start_bus_number; + start <<= 20; + start += cfg->address; + size = cfg->end_bus_number + 1 - cfg->start_bus_number; + size <<= 20; + addr = ioremap_nocache(start, size); if (addr) { printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", - cfg->address, cfg->address + size - 1); + start, start + size - 1); + addr -= cfg->start_bus_number << 20; } return addr; } @@ -157,7 +162,7 @@ void __init pci_mmcfg_arch_free(void) for (i = 0; i < pci_mmcfg_config_num; ++i) { if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt); + iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20)); pci_mmcfg_virt[i].virt = NULL; pci_mmcfg_virt[i].cfg = NULL; } diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index 274d06082f48..ce702c5b3a2c 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -12,6 +12,7 @@ #include <asm/mtrr.h> #include <asm/mce.h> #include <asm/xcr.h> +#include <asm/suspend.h> static struct saved_context saved_context; diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index e3b6cf70d62c..5343540f2607 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -15,6 +15,7 @@ #include <asm/pgtable.h> #include <asm/mtrr.h> #include <asm/xcr.h> +#include <asm/suspend.h> static void fix_processor_context(void); diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 6dd000dd7933..65fdc86e923f 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -14,6 +14,7 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/mtrr.h> +#include <asm/suspend.h> /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 25d46c84eb08..4c559cf7da2d 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -18,6 +18,7 @@ #include <linux/mm.h> #include <linux/major.h> #include <linux/param.h> +#include <linux/seq_file.h> #include <linux/serial.h> #include <linux/serialP.h> @@ -176,22 +177,24 @@ static void rs_wait_until_sent(struct tty_struct *tty, int timeout) /* Stub, once again.. */ } -static int rs_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int rs_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin = 0; - - len += sprintf(page, "serinfo:1.0 driver:%s\n", serial_version); - *eof = 1; - - if (off >= len + begin) - return 0; + seq_printf(m, "serinfo:1.0 driver:%s\n", serial_version); + return 0; +} - *start = page + (off - begin); - return ((count < begin + len - off) ? count : begin + len - off); +static int rs_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, rs_proc_show, NULL); } +static const struct file_operations rs_proc_fops = { + .owner = THIS_MODULE, + .open = rs_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; static struct tty_operations serial_ops = { .open = rs_open, @@ -203,7 +206,7 @@ static struct tty_operations serial_ops = { .chars_in_buffer = rs_chars_in_buffer, .hangup = rs_hangup, .wait_until_sent = rs_wait_until_sent, - .read_proc = rs_read_proc + .proc_fops = &rs_proc_fops, }; int __init rs_init(void) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 5b38a026d122..196f97d00956 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -66,11 +66,18 @@ struct acpi_pci_root { struct acpi_device * device; struct acpi_pci_id id; struct pci_bus *bus; + + u32 osc_support_set; /* _OSC state of support bits */ + u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_control_qry; /* the latest _OSC query result */ + + u32 osc_queried:1; /* has _OSC control been queried? */ }; static LIST_HEAD(acpi_pci_roots); static struct acpi_pci_driver *sub_driver; +static DEFINE_MUTEX(osc_lock); int acpi_pci_register_driver(struct acpi_pci_driver *driver) { @@ -185,6 +192,175 @@ static void acpi_pci_bridge_scan(struct acpi_device *device) } } +static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, + 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; + +static acpi_status acpi_pci_run_osc(acpi_handle handle, + const u32 *capbuf, u32 *retval) +{ + acpi_status status; + struct acpi_object_list input; + union acpi_object in_params[4]; + struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *out_obj; + u32 errors; + + /* Setting up input parameters */ + input.count = 4; + input.pointer = in_params; + in_params[0].type = ACPI_TYPE_BUFFER; + in_params[0].buffer.length = 16; + in_params[0].buffer.pointer = OSC_UUID; + in_params[1].type = ACPI_TYPE_INTEGER; + in_params[1].integer.value = 1; + in_params[2].type = ACPI_TYPE_INTEGER; + in_params[2].integer.value = 3; + in_params[3].type = ACPI_TYPE_BUFFER; + in_params[3].buffer.length = 12; + in_params[3].buffer.pointer = (u8 *)capbuf; + + status = acpi_evaluate_object(handle, "_OSC", &input, &output); + if (ACPI_FAILURE(status)) + return status; + + if (!output.length) + return AE_NULL_OBJECT; + + out_obj = output.pointer; + if (out_obj->type != ACPI_TYPE_BUFFER) { + printk(KERN_DEBUG "_OSC evaluation returned wrong type\n"); + status = AE_TYPE; + goto out_kfree; + } + /* Need to ignore the bit0 in result code */ + errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); + if (errors) { + if (errors & OSC_REQUEST_ERROR) + printk(KERN_DEBUG "_OSC request failed\n"); + if (errors & OSC_INVALID_UUID_ERROR) + printk(KERN_DEBUG "_OSC invalid UUID\n"); + if (errors & OSC_INVALID_REVISION_ERROR) + printk(KERN_DEBUG "_OSC invalid revision\n"); + if (errors & OSC_CAPABILITIES_MASK_ERROR) { + if (capbuf[OSC_QUERY_TYPE] & OSC_QUERY_ENABLE) + goto out_success; + printk(KERN_DEBUG + "Firmware did not grant requested _OSC control\n"); + status = AE_SUPPORT; + goto out_kfree; + } + status = AE_ERROR; + goto out_kfree; + } +out_success: + *retval = *((u32 *)(out_obj->buffer.pointer + 8)); + status = AE_OK; + +out_kfree: + kfree(output.pointer); + return status; +} + +static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags) +{ + acpi_status status; + u32 support_set, result, capbuf[3]; + + /* do _OSC query for all possible controls */ + support_set = root->osc_support_set | (flags & OSC_SUPPORT_MASKS); + capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; + capbuf[OSC_SUPPORT_TYPE] = support_set; + capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; + + status = acpi_pci_run_osc(root->device->handle, capbuf, &result); + if (ACPI_SUCCESS(status)) { + root->osc_support_set = support_set; + root->osc_control_qry = result; + root->osc_queried = 1; + } + return status; +} + +static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags) +{ + acpi_status status; + acpi_handle tmp; + + status = acpi_get_handle(root->device->handle, "_OSC", &tmp); + if (ACPI_FAILURE(status)) + return status; + mutex_lock(&osc_lock); + status = acpi_pci_query_osc(root, flags); + mutex_unlock(&osc_lock); + return status; +} + +static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle) +{ + struct acpi_pci_root *root; + list_for_each_entry(root, &acpi_pci_roots, node) { + if (root->device->handle == handle) + return root; + } + return NULL; +} + +/** + * acpi_pci_osc_control_set - commit requested control to Firmware + * @handle: acpi_handle for the target ACPI object + * @flags: driver's requested control bits + * + * Attempt to take control from Firmware on requested control bits. + **/ +acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) +{ + acpi_status status; + u32 control_req, result, capbuf[3]; + acpi_handle tmp; + struct acpi_pci_root *root; + + status = acpi_get_handle(handle, "_OSC", &tmp); + if (ACPI_FAILURE(status)) + return status; + + control_req = (flags & OSC_CONTROL_MASKS); + if (!control_req) + return AE_TYPE; + + root = acpi_pci_find_root(handle); + if (!root) + return AE_NOT_EXIST; + + mutex_lock(&osc_lock); + /* No need to evaluate _OSC if the control was already granted. */ + if ((root->osc_control_set & control_req) == control_req) + goto out; + + /* Need to query controls first before requesting them */ + if (!root->osc_queried) { + status = acpi_pci_query_osc(root, root->osc_support_set); + if (ACPI_FAILURE(status)) + goto out; + } + if ((root->osc_control_qry & control_req) != control_req) { + printk(KERN_DEBUG + "Firmware did not grant requested _OSC control\n"); + status = AE_SUPPORT; + goto out; + } + + capbuf[OSC_QUERY_TYPE] = 0; + capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set; + capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req; + status = acpi_pci_run_osc(handle, capbuf, &result); + if (ACPI_SUCCESS(status)) + root->osc_control_set = result; +out: + mutex_unlock(&osc_lock); + return status; +} +EXPORT_SYMBOL(acpi_pci_osc_control_set); + static int __devinit acpi_pci_root_add(struct acpi_device *device) { int result = 0; @@ -217,7 +393,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) * PCI domains, so we indicate this in _OSC support capabilities. */ flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; - pci_acpi_osc_support(device->handle, flags); + acpi_pci_osc_support(root, flags); /* * Segment @@ -353,7 +529,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) if (pci_msi_enabled()) flags |= OSC_MSI_SUPPORT; if (flags != base_flags) - pci_acpi_osc_support(device->handle, flags); + acpi_pci_osc_support(root, flags); end: if (result) { diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 14b9d5f4c203..c07e725ea93d 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -6,7 +6,6 @@ # menuconfig AUXDISPLAY - depends on PARPORT bool "Auxiliary Display support" ---help--- Say Y here to get to see options for auxiliary display drivers. @@ -14,7 +13,7 @@ menuconfig AUXDISPLAY If you say N, all options in this submenu will be skipped and disabled. -if AUXDISPLAY && PARPORT +if AUXDISPLAY config KS0108 tristate "KS0108 LCD Controller" diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 2621ed2ce6d2..40b17d3b55a1 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1192,6 +1192,30 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { return err; } +static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) +{ + int err; + sector_t sec; + loff_t sz; + + err = -ENXIO; + if (unlikely(lo->lo_state != Lo_bound)) + goto out; + err = figure_loop_size(lo); + if (unlikely(err)) + goto out; + sec = get_capacity(lo->lo_disk); + /* the width of sector_t may be narrow for bit-shift */ + sz = sec; + sz <<= 9; + mutex_lock(&bdev->bd_mutex); + bd_set_size(bdev, sz); + mutex_unlock(&bdev->bd_mutex); + + out: + return err; +} + static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1224,6 +1248,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_GET_STATUS64: err = loop_get_status64(lo, (struct loop_info64 __user *) arg); break; + case LOOP_SET_CAPACITY: + err = -EPERM; + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + err = loop_set_capacity(lo, bdev); + break; default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } @@ -1371,6 +1400,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, lo, (struct compat_loop_info __user *) arg); mutex_unlock(&lo->lo_ctl_mutex); break; + case LOOP_SET_CAPACITY: case LOOP_CLR_FD: case LOOP_GET_STATUS64: case LOOP_SET_STATUS64: diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c index a58869ea8513..fd3ebd1be570 100644 --- a/drivers/char/amiserial.c +++ b/drivers/char/amiserial.c @@ -79,6 +79,7 @@ static char *serial_version = "4.30"; #include <linux/ptrace.h> #include <linux/ioport.h> #include <linux/mm.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/bitops.h> @@ -1825,14 +1826,13 @@ static int rs_open(struct tty_struct *tty, struct file * filp) * /proc fs routines.... */ -static inline int line_info(char *buf, struct serial_state *state) +static inline void line_info(struct seq_file *m, struct serial_state *state) { struct async_struct *info = state->info, scr_info; char stat_buf[30], control, status; - int ret; unsigned long flags; - ret = sprintf(buf, "%d: uart:amiga_builtin",state->line); + seq_printf(m, "%d: uart:amiga_builtin",state->line); /* * Figure out the current RS-232 lines @@ -1864,55 +1864,49 @@ static inline int line_info(char *buf, struct serial_state *state) strcat(stat_buf, "|CD"); if (info->quot) { - ret += sprintf(buf+ret, " baud:%d", - state->baud_base / info->quot); + seq_printf(m, " baud:%d", state->baud_base / info->quot); } - ret += sprintf(buf+ret, " tx:%d rx:%d", - state->icount.tx, state->icount.rx); + seq_printf(m, " tx:%d rx:%d", state->icount.tx, state->icount.rx); if (state->icount.frame) - ret += sprintf(buf+ret, " fe:%d", state->icount.frame); + seq_printf(m, " fe:%d", state->icount.frame); if (state->icount.parity) - ret += sprintf(buf+ret, " pe:%d", state->icount.parity); + seq_printf(m, " pe:%d", state->icount.parity); if (state->icount.brk) - ret += sprintf(buf+ret, " brk:%d", state->icount.brk); + seq_printf(m, " brk:%d", state->icount.brk); if (state->icount.overrun) - ret += sprintf(buf+ret, " oe:%d", state->icount.overrun); + seq_printf(m, " oe:%d", state->icount.overrun); /* * Last thing is the RS-232 status lines */ - ret += sprintf(buf+ret, " %s\n", stat_buf+1); - return ret; + seq_printf(m, " %s\n", stat_buf+1); } -static int rs_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int rs_proc_show(struct seq_file *m, void *v) { - int len = 0, l; - off_t begin = 0; - - len += sprintf(page, "serinfo:1.0 driver:%s\n", serial_version); - l = line_info(page + len, &rs_table[0]); - len += l; - if (len+begin > off+count) - goto done; - if (len+begin < off) { - begin += len; - len = 0; - } - *eof = 1; -done: - if (off >= len+begin) - return 0; - *start = page + (off-begin); - return ((count < begin+len-off) ? count : begin+len-off); + seq_printf(m, "serinfo:1.0 driver:%s\n", serial_version); + line_info(m, &rs_table[0]); + return 0; } +static int rs_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, rs_proc_show, NULL); +} + +static const struct file_operations rs_proc_fops = { + .owner = THIS_MODULE, + .open = rs_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * --------------------------------------------------------------------- * rs_init() and friends @@ -1951,9 +1945,9 @@ static const struct tty_operations serial_ops = { .break_ctl = rs_break, .send_xchar = rs_send_xchar, .wait_until_sent = rs_wait_until_sent, - .read_proc = rs_read_proc, .tiocmget = rs_tiocmget, .tiocmset = rs_tiocmset, + .proc_fops = &rs_proc_fops, }; /* diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index 6a59f72a9c21..272db0e2b491 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -657,6 +657,7 @@ #include <linux/stat.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> static void cy_throttle(struct tty_struct *tty); static void cy_send_xchar(struct tty_struct *tty, char ch); @@ -868,8 +869,6 @@ static int cyz_issue_cmd(struct cyclades_card *, __u32, __u8, __u32); static unsigned detect_isa_irq(void __iomem *); #endif /* CONFIG_ISA */ -static int cyclades_get_proc_info(char *, char **, off_t, int, int *, void *); - #ifndef CONFIG_CYZ_INTR static void cyz_poll(unsigned long); @@ -5216,31 +5215,22 @@ static struct pci_driver cy_pci_driver = { }; #endif -static int -cyclades_get_proc_info(char *buf, char **start, off_t offset, int length, - int *eof, void *data) +static int cyclades_proc_show(struct seq_file *m, void *v) { struct cyclades_port *info; unsigned int i, j; - int len = 0; - off_t begin = 0; - off_t pos = 0; - int size; __u32 cur_jifs = jiffies; - size = sprintf(buf, "Dev TimeOpen BytesOut IdleOut BytesIn " + seq_puts(m, "Dev TimeOpen BytesOut IdleOut BytesIn " "IdleIn Overruns Ldisc\n"); - pos += size; - len += size; - /* Output one line for each known port */ for (i = 0; i < NR_CARDS; i++) for (j = 0; j < cy_card[i].nports; j++) { info = &cy_card[i].ports[j]; if (info->port.count) - size = sprintf(buf + len, "%3d %8lu %10lu %8lu " + seq_printf(m, "%3d %8lu %10lu %8lu " "%10lu %8lu %9lu %6ld\n", info->line, (cur_jifs - info->idle_stats.in_use) / HZ, info->idle_stats.xmit_bytes, @@ -5251,30 +5241,26 @@ cyclades_get_proc_info(char *buf, char **start, off_t offset, int length, /* FIXME: double check locking */ (long)info->port.tty->ldisc.ops->num); else - size = sprintf(buf + len, "%3d %8lu %10lu %8lu " + seq_printf(m, "%3d %8lu %10lu %8lu " "%10lu %8lu %9lu %6ld\n", info->line, 0L, 0L, 0L, 0L, 0L, 0L, 0L); - len += size; - pos = begin + len; - - if (pos < offset) { - len = 0; - begin = pos; - } - if (pos > offset + length) - goto done; } - *eof = 1; -done: - *start = buf + (offset - begin); /* Start of wanted data */ - len -= (offset - begin); /* Start slop */ - if (len > length) - len = length; /* Ending slop */ - if (len < 0) - len = 0; - return len; + return 0; +} + +static int cyclades_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cyclades_proc_show, NULL); } +static const struct file_operations cyclades_proc_fops = { + .owner = THIS_MODULE, + .open = cyclades_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* The serial driver boot-time initialization code! Hardware I/O ports are mapped to character special devices on a first found, first allocated manner. That is, this code searches @@ -5311,9 +5297,9 @@ static const struct tty_operations cy_ops = { .hangup = cy_hangup, .break_ctl = cy_break, .wait_until_sent = cy_wait_until_sent, - .read_proc = cyclades_get_proc_info, .tiocmget = cy_tiocmget, .tiocmset = cy_tiocmset, + .proc_fops = &cyclades_proc_fops, }; static int __init cy_init(void) diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index 70e0ebc30bd0..afd9247cf082 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -139,7 +139,7 @@ #include <linux/seq_file.h> static const struct file_operations ip2mem_proc_fops; -static int ip2_read_proc(char *, char **, off_t, int, int *, void * ); +static const struct file_operations ip2_proc_fops; /********************/ /* Type Definitions */ @@ -446,9 +446,9 @@ static const struct tty_operations ip2_ops = { .stop = ip2_stop, .start = ip2_start, .hangup = ip2_hangup, - .read_proc = ip2_read_proc, .tiocmget = ip2_tiocmget, .tiocmset = ip2_tiocmset, + .proc_fops = &ip2_proc_fops, }; /******************************************************************************/ @@ -3029,19 +3029,17 @@ static const struct file_operations ip2mem_proc_fops = { * different sources including ip2mkdev.c and a couple of other drivers. * The bugs are all mine. :-) =mhw= */ -static int ip2_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int ip2_proc_show(struct seq_file *m, void *v) { int i, j, box; - int len = 0; int boxes = 0; int ports = 0; int tports = 0; - off_t begin = 0; i2eBordStrPtr pB; + char *sep; - len += sprintf(page, "ip2info: 1.0 driver: %s\n", pcVersion ); - len += sprintf(page+len, "Driver: SMajor=%d CMajor=%d IMajor=%d MaxBoards=%d MaxBoxes=%d MaxPorts=%d\n", + seq_printf(m, "ip2info: 1.0 driver: %s\n", pcVersion); + seq_printf(m, "Driver: SMajor=%d CMajor=%d IMajor=%d MaxBoards=%d MaxBoxes=%d MaxPorts=%d\n", IP2_TTY_MAJOR, IP2_CALLOUT_MAJOR, IP2_IPL_MAJOR, IP2_MAX_BOARDS, ABS_MAX_BOXES, ABS_BIGGEST_BOX); @@ -3053,7 +3051,8 @@ static int ip2_read_proc(char *page, char **start, off_t off, switch( pB->i2ePom.e.porID & ~POR_ID_RESERVED ) { case POR_ID_FIIEX: - len += sprintf( page+len, "Board %d: EX ports=", i ); + seq_printf(m, "Board %d: EX ports=", i); + sep = ""; for( box = 0; box < ABS_MAX_BOXES; ++box ) { ports = 0; @@ -3065,79 +3064,74 @@ static int ip2_read_proc(char *page, char **start, off_t off, ++ports; } } - len += sprintf( page+len, "%d,", ports ); + seq_printf(m, "%s%d", sep, ports); + sep = ","; tports += ports; } - - --len; /* Backup over that last comma */ - - len += sprintf( page+len, " boxes=%d width=%d", boxes, pB->i2eDataWidth16 ? 16 : 8 ); + seq_printf(m, " boxes=%d width=%d", boxes, pB->i2eDataWidth16 ? 16 : 8); break; case POR_ID_II_4: - len += sprintf(page+len, "Board %d: ISA-4 ports=4 boxes=1", i ); + seq_printf(m, "Board %d: ISA-4 ports=4 boxes=1", i); tports = ports = 4; break; case POR_ID_II_8: - len += sprintf(page+len, "Board %d: ISA-8-std ports=8 boxes=1", i ); + seq_printf(m, "Board %d: ISA-8-std ports=8 boxes=1", i); tports = ports = 8; break; case POR_ID_II_8R: - len += sprintf(page+len, "Board %d: ISA-8-RJ11 ports=8 boxes=1", i ); + seq_printf(m, "Board %d: ISA-8-RJ11 ports=8 boxes=1", i); tports = ports = 8; break; default: - len += sprintf(page+len, "Board %d: unknown", i ); + seq_printf(m, "Board %d: unknown", i); /* Don't try and probe for minor numbers */ tports = ports = 0; } } else { /* Don't try and probe for minor numbers */ - len += sprintf(page+len, "Board %d: vacant", i ); + seq_printf(m, "Board %d: vacant", i); tports = ports = 0; } if( tports ) { - len += sprintf(page+len, " minors=" ); - + seq_puts(m, " minors="); + sep = ""; for ( box = 0; box < ABS_MAX_BOXES; ++box ) { for ( j = 0; j < ABS_BIGGEST_BOX; ++j ) { if ( pB->i2eChannelMap[box] & (1 << j) ) { - len += sprintf (page+len,"%d,", + seq_printf(m, "%s%d", sep, j + ABS_BIGGEST_BOX * (box+i*ABS_MAX_BOXES)); + sep = ","; } } } - - page[ len - 1 ] = '\n'; /* Overwrite that last comma */ - } else { - len += sprintf (page+len,"\n" ); - } - - if (len+begin > off+count) - break; - if (len+begin < off) { - begin += len; - len = 0; } + seq_putc(m, '\n'); } + return 0; + } - if (i >= IP2_MAX_BOARDS) - *eof = 1; - if (off >= len+begin) - return 0; +static int ip2_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ip2_proc_show, NULL); +} - *start = page + (off-begin); - return ((count < begin+len-off) ? count : begin+len-off); - } +static const struct file_operations ip2_proc_fops = { + .owner = THIS_MODULE, + .open = ip2_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; /******************************************************************************/ /* Function: ip2trace() */ diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index 5c3dc6b8411c..fff19f7e29d2 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -24,6 +24,7 @@ #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/serial.h> +#include <linux/seq_file.h> #include <linux/cdk.h> #include <linux/comstats.h> #include <linux/istallion.h> @@ -613,7 +614,6 @@ static int stli_breakctl(struct tty_struct *tty, int state); static void stli_waituntilsent(struct tty_struct *tty, int timeout); static void stli_sendxchar(struct tty_struct *tty, char ch); static void stli_hangup(struct tty_struct *tty); -static int stli_portinfo(struct stlibrd *brdp, struct stliport *portp, int portnr, char *pos); static int stli_brdinit(struct stlibrd *brdp); static int stli_startbrd(struct stlibrd *brdp); @@ -1893,20 +1893,10 @@ static void stli_sendxchar(struct tty_struct *tty, char ch) stli_cmdwait(brdp, portp, A_PORTCTRL, &actrl, sizeof(asyctrl_t), 0); } -/*****************************************************************************/ - -#define MAXLINE 80 - -/* - * Format info for a specified port. The line is deliberately limited - * to 80 characters. (If it is too long it will be truncated, if too - * short then padded with spaces). - */ - -static int stli_portinfo(struct stlibrd *brdp, struct stliport *portp, int portnr, char *pos) +static void stli_portinfo(struct seq_file *m, struct stlibrd *brdp, struct stliport *portp, int portnr) { - char *sp, *uart; - int rc, cnt; + char *uart; + int rc; rc = stli_portcmdstats(NULL, portp); @@ -1918,44 +1908,50 @@ static int stli_portinfo(struct stlibrd *brdp, struct stliport *portp, int portn default:uart = "CD1400"; break; } } - - sp = pos; - sp += sprintf(sp, "%d: uart:%s ", portnr, uart); + seq_printf(m, "%d: uart:%s ", portnr, uart); if ((brdp->state & BST_STARTED) && (rc >= 0)) { - sp += sprintf(sp, "tx:%d rx:%d", (int) stli_comstats.txtotal, + char sep; + + seq_printf(m, "tx:%d rx:%d", (int) stli_comstats.txtotal, (int) stli_comstats.rxtotal); if (stli_comstats.rxframing) - sp += sprintf(sp, " fe:%d", + seq_printf(m, " fe:%d", (int) stli_comstats.rxframing); if (stli_comstats.rxparity) - sp += sprintf(sp, " pe:%d", + seq_printf(m, " pe:%d", (int) stli_comstats.rxparity); if (stli_comstats.rxbreaks) - sp += sprintf(sp, " brk:%d", + seq_printf(m, " brk:%d", (int) stli_comstats.rxbreaks); if (stli_comstats.rxoverrun) - sp += sprintf(sp, " oe:%d", + seq_printf(m, " oe:%d", (int) stli_comstats.rxoverrun); - cnt = sprintf(sp, "%s%s%s%s%s ", - (stli_comstats.signals & TIOCM_RTS) ? "|RTS" : "", - (stli_comstats.signals & TIOCM_CTS) ? "|CTS" : "", - (stli_comstats.signals & TIOCM_DTR) ? "|DTR" : "", - (stli_comstats.signals & TIOCM_CD) ? "|DCD" : "", - (stli_comstats.signals & TIOCM_DSR) ? "|DSR" : ""); - *sp = ' '; - sp += cnt; + sep = ' '; + if (stli_comstats.signals & TIOCM_RTS) { + seq_printf(m, "%c%s", sep, "RTS"); + sep = '|'; + } + if (stli_comstats.signals & TIOCM_CTS) { + seq_printf(m, "%c%s", sep, "CTS"); + sep = '|'; + } + if (stli_comstats.signals & TIOCM_DTR) { + seq_printf(m, "%c%s", sep, "DTR"); + sep = '|'; + } + if (stli_comstats.signals & TIOCM_CD) { + seq_printf(m, "%c%s", sep, "DCD"); + sep = '|'; + } + if (stli_comstats.signals & TIOCM_DSR) { + seq_printf(m, "%c%s", sep, "DSR"); + sep = '|'; + } } - - for (cnt = (sp - pos); (cnt < (MAXLINE - 1)); cnt++) - *sp++ = ' '; - if (cnt >= MAXLINE) - pos[(MAXLINE - 2)] = '+'; - pos[(MAXLINE - 1)] = '\n'; - - return(MAXLINE); + seq_putc(m, '\n'); } /*****************************************************************************/ @@ -1964,26 +1960,15 @@ static int stli_portinfo(struct stlibrd *brdp, struct stliport *portp, int portn * Port info, read from the /proc file system. */ -static int stli_readproc(char *page, char **start, off_t off, int count, int *eof, void *data) +static int stli_proc_show(struct seq_file *m, void *v) { struct stlibrd *brdp; struct stliport *portp; unsigned int brdnr, portnr, totalport; - int curoff, maxoff; - char *pos; - pos = page; totalport = 0; - curoff = 0; - - if (off == 0) { - pos += sprintf(pos, "%s: version %s", stli_drvtitle, - stli_drvversion); - while (pos < (page + MAXLINE - 1)) - *pos++ = ' '; - *pos++ = '\n'; - } - curoff = MAXLINE; + + seq_printf(m, "%s: version %s\n", stli_drvtitle, stli_drvversion); /* * We scan through for each board, panel and port. The offset is @@ -1996,33 +1981,31 @@ static int stli_readproc(char *page, char **start, off_t off, int count, int *eo if (brdp->state == 0) continue; - maxoff = curoff + (brdp->nrports * MAXLINE); - if (off >= maxoff) { - curoff = maxoff; - continue; - } - totalport = brdnr * STL_MAXPORTS; for (portnr = 0; (portnr < brdp->nrports); portnr++, totalport++) { portp = brdp->ports[portnr]; if (portp == NULL) continue; - if (off >= (curoff += MAXLINE)) - continue; - if ((pos - page + MAXLINE) > count) - goto stli_readdone; - pos += stli_portinfo(brdp, portp, totalport, pos); + stli_portinfo(m, brdp, portp, totalport); } } + return 0; +} - *eof = 1; - -stli_readdone: - *start = page; - return(pos - page); +static int stli_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, stli_proc_show, NULL); } +static const struct file_operations stli_proc_fops = { + .owner = THIS_MODULE, + .open = stli_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /*****************************************************************************/ /* @@ -4427,9 +4410,9 @@ static const struct tty_operations stli_ops = { .break_ctl = stli_breakctl, .wait_until_sent = stli_waituntilsent, .send_xchar = stli_sendxchar, - .read_proc = stli_readproc, .tiocmget = stli_tiocmget, .tiocmset = stli_tiocmset, + .proc_fops = &stli_proc_fops, }; static const struct tty_port_operations stli_port_ops = { diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 5608a1e5a3b3..19d79fc54461 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -51,6 +51,7 @@ #include <linux/ptrace.h> #include <linux/ioport.h> #include <linux/mm.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> @@ -2619,13 +2620,12 @@ cleanup: * /proc fs routines.... */ -static inline int line_info(char *buf, MGSLPC_INFO *info) +static inline void line_info(struct seq_file *m, MGSLPC_INFO *info) { char stat_buf[30]; - int ret; unsigned long flags; - ret = sprintf(buf, "%s:io:%04X irq:%d", + seq_printf(m, "%s:io:%04X irq:%d", info->device_name, info->io_base, info->irq_level); /* output current serial signal states */ @@ -2649,75 +2649,70 @@ static inline int line_info(char *buf, MGSLPC_INFO *info) strcat(stat_buf, "|RI"); if (info->params.mode == MGSL_MODE_HDLC) { - ret += sprintf(buf+ret, " HDLC txok:%d rxok:%d", + seq_printf(m, " HDLC txok:%d rxok:%d", info->icount.txok, info->icount.rxok); if (info->icount.txunder) - ret += sprintf(buf+ret, " txunder:%d", info->icount.txunder); + seq_printf(m, " txunder:%d", info->icount.txunder); if (info->icount.txabort) - ret += sprintf(buf+ret, " txabort:%d", info->icount.txabort); + seq_printf(m, " txabort:%d", info->icount.txabort); if (info->icount.rxshort) - ret += sprintf(buf+ret, " rxshort:%d", info->icount.rxshort); + seq_printf(m, " rxshort:%d", info->icount.rxshort); if (info->icount.rxlong) - ret += sprintf(buf+ret, " rxlong:%d", info->icount.rxlong); + seq_printf(m, " rxlong:%d", info->icount.rxlong); if (info->icount.rxover) - ret += sprintf(buf+ret, " rxover:%d", info->icount.rxover); + seq_printf(m, " rxover:%d", info->icount.rxover); if (info->icount.rxcrc) - ret += sprintf(buf+ret, " rxcrc:%d", info->icount.rxcrc); + seq_printf(m, " rxcrc:%d", info->icount.rxcrc); } else { - ret += sprintf(buf+ret, " ASYNC tx:%d rx:%d", + seq_printf(m, " ASYNC tx:%d rx:%d", info->icount.tx, info->icount.rx); if (info->icount.frame) - ret += sprintf(buf+ret, " fe:%d", info->icount.frame); + seq_printf(m, " fe:%d", info->icount.frame); if (info->icount.parity) - ret += sprintf(buf+ret, " pe:%d", info->icount.parity); + seq_printf(m, " pe:%d", info->icount.parity); if (info->icount.brk) - ret += sprintf(buf+ret, " brk:%d", info->icount.brk); + seq_printf(m, " brk:%d", info->icount.brk); if (info->icount.overrun) - ret += sprintf(buf+ret, " oe:%d", info->icount.overrun); + seq_printf(m, " oe:%d", info->icount.overrun); } /* Append serial signal status to end */ - ret += sprintf(buf+ret, " %s\n", stat_buf+1); + seq_printf(m, " %s\n", stat_buf+1); - ret += sprintf(buf+ret, "txactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", + seq_printf(m, "txactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", info->tx_active,info->bh_requested,info->bh_running, info->pending_bh); - - return ret; } /* Called to print information about devices */ -static int mgslpc_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int mgslpc_proc_show(struct seq_file *m, void *v) { - int len = 0, l; - off_t begin = 0; MGSLPC_INFO *info; - len += sprintf(page, "synclink driver:%s\n", driver_version); + seq_printf(m, "synclink driver:%s\n", driver_version); info = mgslpc_device_list; while( info ) { - l = line_info(page + len, info); - len += l; - if (len+begin > off+count) - goto done; - if (len+begin < off) { - begin += len; - len = 0; - } + line_info(m, info); info = info->next_device; } + return 0; +} - *eof = 1; -done: - if (off >= len+begin) - return 0; - *start = page + (off-begin); - return ((count < begin+len-off) ? count : begin+len-off); +static int mgslpc_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, mgslpc_proc_show, NULL); } +static const struct file_operations mgslpc_proc_fops = { + .owner = THIS_MODULE, + .open = mgslpc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int rx_alloc_buffers(MGSLPC_INFO *info) { /* each buffer has header and data */ @@ -2861,13 +2856,13 @@ static const struct tty_operations mgslpc_ops = { .send_xchar = mgslpc_send_xchar, .break_ctl = mgslpc_break, .wait_until_sent = mgslpc_wait_until_sent, - .read_proc = mgslpc_read_proc, .set_termios = mgslpc_set_termios, .stop = tx_pause, .start = tx_release, .hangup = mgslpc_hangup, .tiocmget = tiocmget, .tiocmset = tiocmset, + .proc_fops = &mgslpc_proc_fops, }; static void synclink_cs_cleanup(void) diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index e1e0dd89ac9a..2ad813a801dc 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -32,6 +32,7 @@ #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/serial.h> +#include <linux/seq_file.h> #include <linux/cd1400.h> #include <linux/sc26198.h> #include <linux/comstats.h> @@ -1379,52 +1380,47 @@ static void stl_sendxchar(struct tty_struct *tty, char ch) stl_putchar(tty, ch); } -/*****************************************************************************/ - -#define MAXLINE 80 - -/* - * Format info for a specified port. The line is deliberately limited - * to 80 characters. (If it is too long it will be truncated, if too - * short then padded with spaces). - */ - -static int stl_portinfo(struct stlport *portp, int portnr, char *pos) +static void stl_portinfo(struct seq_file *m, struct stlport *portp, int portnr) { - char *sp; - int sigs, cnt; + int sigs; + char sep; - sp = pos; - sp += sprintf(sp, "%d: uart:%s tx:%d rx:%d", + seq_printf(m, "%d: uart:%s tx:%d rx:%d", portnr, (portp->hwid == 1) ? "SC26198" : "CD1400", (int) portp->stats.txtotal, (int) portp->stats.rxtotal); if (portp->stats.rxframing) - sp += sprintf(sp, " fe:%d", (int) portp->stats.rxframing); + seq_printf(m, " fe:%d", (int) portp->stats.rxframing); if (portp->stats.rxparity) - sp += sprintf(sp, " pe:%d", (int) portp->stats.rxparity); + seq_printf(m, " pe:%d", (int) portp->stats.rxparity); if (portp->stats.rxbreaks) - sp += sprintf(sp, " brk:%d", (int) portp->stats.rxbreaks); + seq_printf(m, " brk:%d", (int) portp->stats.rxbreaks); if (portp->stats.rxoverrun) - sp += sprintf(sp, " oe:%d", (int) portp->stats.rxoverrun); + seq_printf(m, " oe:%d", (int) portp->stats.rxoverrun); sigs = stl_getsignals(portp); - cnt = sprintf(sp, "%s%s%s%s%s ", - (sigs & TIOCM_RTS) ? "|RTS" : "", - (sigs & TIOCM_CTS) ? "|CTS" : "", - (sigs & TIOCM_DTR) ? "|DTR" : "", - (sigs & TIOCM_CD) ? "|DCD" : "", - (sigs & TIOCM_DSR) ? "|DSR" : ""); - *sp = ' '; - sp += cnt; - - for (cnt = sp - pos; cnt < (MAXLINE - 1); cnt++) - *sp++ = ' '; - if (cnt >= MAXLINE) - pos[(MAXLINE - 2)] = '+'; - pos[(MAXLINE - 1)] = '\n'; - - return MAXLINE; + sep = ' '; + if (sigs & TIOCM_RTS) { + seq_printf(m, "%c%s", sep, "RTS"); + sep = '|'; + } + if (sigs & TIOCM_CTS) { + seq_printf(m, "%c%s", sep, "CTS"); + sep = '|'; + } + if (sigs & TIOCM_DTR) { + seq_printf(m, "%c%s", sep, "DTR"); + sep = '|'; + } + if (sigs & TIOCM_CD) { + seq_printf(m, "%c%s", sep, "DCD"); + sep = '|'; + } + if (sigs & TIOCM_DSR) { + seq_printf(m, "%c%s", sep, "DSR"); + sep = '|'; + } + seq_putc(m, '\n'); } /*****************************************************************************/ @@ -1433,30 +1429,17 @@ static int stl_portinfo(struct stlport *portp, int portnr, char *pos) * Port info, read from the /proc file system. */ -static int stl_readproc(char *page, char **start, off_t off, int count, int *eof, void *data) +static int stl_proc_show(struct seq_file *m, void *v) { struct stlbrd *brdp; struct stlpanel *panelp; struct stlport *portp; unsigned int brdnr, panelnr, portnr; - int totalport, curoff, maxoff; - char *pos; + int totalport; - pr_debug("stl_readproc(page=%p,start=%p,off=%lx,count=%d,eof=%p," - "data=%p\n", page, start, off, count, eof, data); - - pos = page; totalport = 0; - curoff = 0; - - if (off == 0) { - pos += sprintf(pos, "%s: version %s", stl_drvtitle, - stl_drvversion); - while (pos < (page + MAXLINE - 1)) - *pos++ = ' '; - *pos++ = '\n'; - } - curoff = MAXLINE; + + seq_printf(m, "%s: version %s\n", stl_drvtitle, stl_drvversion); /* * We scan through for each board, panel and port. The offset is @@ -1469,46 +1452,37 @@ static int stl_readproc(char *page, char **start, off_t off, int count, int *eof if (brdp->state == 0) continue; - maxoff = curoff + (brdp->nrports * MAXLINE); - if (off >= maxoff) { - curoff = maxoff; - continue; - } - totalport = brdnr * STL_MAXPORTS; for (panelnr = 0; panelnr < brdp->nrpanels; panelnr++) { panelp = brdp->panels[panelnr]; if (panelp == NULL) continue; - maxoff = curoff + (panelp->nrports * MAXLINE); - if (off >= maxoff) { - curoff = maxoff; - totalport += panelp->nrports; - continue; - } - for (portnr = 0; portnr < panelp->nrports; portnr++, totalport++) { portp = panelp->ports[portnr]; if (portp == NULL) continue; - if (off >= (curoff += MAXLINE)) - continue; - if ((pos - page + MAXLINE) > count) - goto stl_readdone; - pos += stl_portinfo(portp, totalport, pos); + stl_portinfo(m, portp, totalport); } } } + return 0; +} - *eof = 1; - -stl_readdone: - *start = page; - return pos - page; +static int stl_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, stl_proc_show, NULL); } +static const struct file_operations stl_proc_fops = { + .owner = THIS_MODULE, + .open = stl_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /*****************************************************************************/ /* @@ -2566,9 +2540,9 @@ static const struct tty_operations stl_ops = { .break_ctl = stl_breakctl, .wait_until_sent = stl_waituntilsent, .send_xchar = stl_sendxchar, - .read_proc = stl_readproc, .tiocmget = stl_tiocmget, .tiocmset = stl_tiocmset, + .proc_fops = &stl_proc_fops, }; static const struct tty_port_operations stl_port_ops = { diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index 0057a8f58cb1..afd0b26ca056 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -79,6 +79,7 @@ #include <linux/ptrace.h> #include <linux/ioport.h> #include <linux/mm.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/delay.h> #include <linux/netdevice.h> @@ -3459,18 +3460,17 @@ cleanup: * /proc fs routines.... */ -static inline int line_info(char *buf, struct mgsl_struct *info) +static inline void line_info(struct seq_file *m, struct mgsl_struct *info) { char stat_buf[30]; - int ret; unsigned long flags; if (info->bus_type == MGSL_BUS_TYPE_PCI) { - ret = sprintf(buf, "%s:PCI io:%04X irq:%d mem:%08X lcr:%08X", + seq_printf(m, "%s:PCI io:%04X irq:%d mem:%08X lcr:%08X", info->device_name, info->io_base, info->irq_level, info->phys_memory_base, info->phys_lcr_base); } else { - ret = sprintf(buf, "%s:(E)ISA io:%04X irq:%d dma:%d", + seq_printf(m, "%s:(E)ISA io:%04X irq:%d dma:%d", info->device_name, info->io_base, info->irq_level, info->dma_level); } @@ -3497,37 +3497,37 @@ static inline int line_info(char *buf, struct mgsl_struct *info) if (info->params.mode == MGSL_MODE_HDLC || info->params.mode == MGSL_MODE_RAW ) { - ret += sprintf(buf+ret, " HDLC txok:%d rxok:%d", + seq_printf(m, " HDLC txok:%d rxok:%d", info->icount.txok, info->icount.rxok); if (info->icount.txunder) - ret += sprintf(buf+ret, " txunder:%d", info->icount.txunder); + seq_printf(m, " txunder:%d", info->icount.txunder); if (info->icount.txabort) - ret += sprintf(buf+ret, " txabort:%d", info->icount.txabort); + seq_printf(m, " txabort:%d", info->icount.txabort); if (info->icount.rxshort) - ret += sprintf(buf+ret, " rxshort:%d", info->icount.rxshort); + seq_printf(m, " rxshort:%d", info->icount.rxshort); if (info->icount.rxlong) - ret += sprintf(buf+ret, " rxlong:%d", info->icount.rxlong); + seq_printf(m, " rxlong:%d", info->icount.rxlong); if (info->icount.rxover) - ret += sprintf(buf+ret, " rxover:%d", info->icount.rxover); + seq_printf(m, " rxover:%d", info->icount.rxover); if (info->icount.rxcrc) - ret += sprintf(buf+ret, " rxcrc:%d", info->icount.rxcrc); + seq_printf(m, " rxcrc:%d", info->icount.rxcrc); } else { - ret += sprintf(buf+ret, " ASYNC tx:%d rx:%d", + seq_printf(m, " ASYNC tx:%d rx:%d", info->icount.tx, info->icount.rx); if (info->icount.frame) - ret += sprintf(buf+ret, " fe:%d", info->icount.frame); + seq_printf(m, " fe:%d", info->icount.frame); if (info->icount.parity) - ret += sprintf(buf+ret, " pe:%d", info->icount.parity); + seq_printf(m, " pe:%d", info->icount.parity); if (info->icount.brk) - ret += sprintf(buf+ret, " brk:%d", info->icount.brk); + seq_printf(m, " brk:%d", info->icount.brk); if (info->icount.overrun) - ret += sprintf(buf+ret, " oe:%d", info->icount.overrun); + seq_printf(m, " oe:%d", info->icount.overrun); } /* Append serial signal status to end */ - ret += sprintf(buf+ret, " %s\n", stat_buf+1); + seq_printf(m, " %s\n", stat_buf+1); - ret += sprintf(buf+ret, "txactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", + seq_printf(m, "txactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", info->tx_active,info->bh_requested,info->bh_running, info->pending_bh); @@ -3544,60 +3544,40 @@ static inline int line_info(char *buf, struct mgsl_struct *info) u16 Tmr = usc_InReg( info, TMR ); u16 Tccr = usc_InReg( info, TCCR ); u16 Ccar = inw( info->io_base + CCAR ); - ret += sprintf(buf+ret, "tcsr=%04X tdmr=%04X ticr=%04X rcsr=%04X rdmr=%04X\n" + seq_printf(m, "tcsr=%04X tdmr=%04X ticr=%04X rcsr=%04X rdmr=%04X\n" "ricr=%04X icr =%04X dccr=%04X tmr=%04X tccr=%04X ccar=%04X\n", Tcsr,Tdmr,Ticr,Rscr,Rdmr,Ricr,Icr,Dccr,Tmr,Tccr,Ccar ); } spin_unlock_irqrestore(&info->irq_spinlock,flags); - - return ret; - -} /* end of line_info() */ +} -/* mgsl_read_proc() - * - * Called to print information about devices - * - * Arguments: - * page page of memory to hold returned info - * start - * off - * count - * eof - * data - * - * Return Value: - */ -static int mgsl_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) +/* Called to print information about devices */ +static int mgsl_proc_show(struct seq_file *m, void *v) { - int len = 0, l; - off_t begin = 0; struct mgsl_struct *info; - len += sprintf(page, "synclink driver:%s\n", driver_version); + seq_printf(m, "synclink driver:%s\n", driver_version); info = mgsl_device_list; while( info ) { - l = line_info(page + len, info); - len += l; - if (len+begin > off+count) - goto done; - if (len+begin < off) { - begin += len; - len = 0; - } + line_info(m, info); info = info->next_device; } + return 0; +} - *eof = 1; -done: - if (off >= len+begin) - return 0; - *start = page + (off-begin); - return ((count < begin+len-off) ? count : begin+len-off); - -} /* end of mgsl_read_proc() */ +static int mgsl_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, mgsl_proc_show, NULL); +} + +static const struct file_operations mgsl_proc_fops = { + .owner = THIS_MODULE, + .open = mgsl_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; /* mgsl_allocate_dma_buffers() * @@ -4335,13 +4315,13 @@ static const struct tty_operations mgsl_ops = { .send_xchar = mgsl_send_xchar, .break_ctl = mgsl_break, .wait_until_sent = mgsl_wait_until_sent, - .read_proc = mgsl_read_proc, .set_termios = mgsl_set_termios, .stop = mgsl_stop, .start = mgsl_start, .hangup = mgsl_hangup, .tiocmget = tiocmget, .tiocmset = tiocmset, + .proc_fops = &mgsl_proc_fops, }; /* diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index efb3dc928a43..6ec6e13d47d7 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -60,6 +60,7 @@ #include <linux/ptrace.h> #include <linux/ioport.h> #include <linux/mm.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> @@ -154,7 +155,6 @@ static void tx_hold(struct tty_struct *tty); static void tx_release(struct tty_struct *tty); static int ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); -static int read_proc(char *page, char **start, off_t off, int count,int *eof, void *data); static int chars_in_buffer(struct tty_struct *tty); static void throttle(struct tty_struct * tty); static void unthrottle(struct tty_struct * tty); @@ -1229,13 +1229,12 @@ static long slgt_compat_ioctl(struct tty_struct *tty, struct file *file, /* * proc fs support */ -static inline int line_info(char *buf, struct slgt_info *info) +static inline void line_info(struct seq_file *m, struct slgt_info *info) { char stat_buf[30]; - int ret; unsigned long flags; - ret = sprintf(buf, "%s: IO=%08X IRQ=%d MaxFrameSize=%u\n", + seq_printf(m, "%s: IO=%08X IRQ=%d MaxFrameSize=%u\n", info->device_name, info->phys_reg_addr, info->irq_level, info->max_frame_size); @@ -1260,75 +1259,70 @@ static inline int line_info(char *buf, struct slgt_info *info) strcat(stat_buf, "|RI"); if (info->params.mode != MGSL_MODE_ASYNC) { - ret += sprintf(buf+ret, "\tHDLC txok:%d rxok:%d", + seq_printf(m, "\tHDLC txok:%d rxok:%d", info->icount.txok, info->icount.rxok); if (info->icount.txunder) - ret += sprintf(buf+ret, " txunder:%d", info->icount.txunder); + seq_printf(m, " txunder:%d", info->icount.txunder); if (info->icount.txabort) - ret += sprintf(buf+ret, " txabort:%d", info->icount.txabort); + seq_printf(m, " txabort:%d", info->icount.txabort); if (info->icount.rxshort) - ret += sprintf(buf+ret, " rxshort:%d", info->icount.rxshort); + seq_printf(m, " rxshort:%d", info->icount.rxshort); if (info->icount.rxlong) - ret += sprintf(buf+ret, " rxlong:%d", info->icount.rxlong); + seq_printf(m, " rxlong:%d", info->icount.rxlong); if (info->icount.rxover) - ret += sprintf(buf+ret, " rxover:%d", info->icount.rxover); + seq_printf(m, " rxover:%d", info->icount.rxover); if (info->icount.rxcrc) - ret += sprintf(buf+ret, " rxcrc:%d", info->icount.rxcrc); + seq_printf(m, " rxcrc:%d", info->icount.rxcrc); } else { - ret += sprintf(buf+ret, "\tASYNC tx:%d rx:%d", + seq_printf(m, "\tASYNC tx:%d rx:%d", info->icount.tx, info->icount.rx); if (info->icount.frame) - ret += sprintf(buf+ret, " fe:%d", info->icount.frame); + seq_printf(m, " fe:%d", info->icount.frame); if (info->icount.parity) - ret += sprintf(buf+ret, " pe:%d", info->icount.parity); + seq_printf(m, " pe:%d", info->icount.parity); if (info->icount.brk) - ret += sprintf(buf+ret, " brk:%d", info->icount.brk); + seq_printf(m, " brk:%d", info->icount.brk); if (info->icount.overrun) - ret += sprintf(buf+ret, " oe:%d", info->icount.overrun); + seq_printf(m, " oe:%d", info->icount.overrun); } /* Append serial signal status to end */ - ret += sprintf(buf+ret, " %s\n", stat_buf+1); + seq_printf(m, " %s\n", stat_buf+1); - ret += sprintf(buf+ret, "\ttxactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", + seq_printf(m, "\ttxactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", info->tx_active,info->bh_requested,info->bh_running, info->pending_bh); - - return ret; } /* Called to print information about devices */ -static int read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int synclink_gt_proc_show(struct seq_file *m, void *v) { - int len = 0, l; - off_t begin = 0; struct slgt_info *info; - len += sprintf(page, "synclink_gt driver\n"); + seq_puts(m, "synclink_gt driver\n"); info = slgt_device_list; while( info ) { - l = line_info(page + len, info); - len += l; - if (len+begin > off+count) - goto done; - if (len+begin < off) { - begin += len; - len = 0; - } + line_info(m, info); info = info->next_device; } + return 0; +} - *eof = 1; -done: - if (off >= len+begin) - return 0; - *start = page + (off-begin); - return ((count < begin+len-off) ? count : begin+len-off); +static int synclink_gt_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, synclink_gt_proc_show, NULL); } +static const struct file_operations synclink_gt_proc_fops = { + .owner = THIS_MODULE, + .open = synclink_gt_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * return count of bytes in transmit buffer */ @@ -3562,13 +3556,13 @@ static const struct tty_operations ops = { .send_xchar = send_xchar, .break_ctl = set_break, .wait_until_sent = wait_until_sent, - .read_proc = read_proc, .set_termios = set_termios, .stop = tx_hold, .start = tx_release, .hangup = hangup, .tiocmget = tiocmget, .tiocmset = tiocmset, + .proc_fops = &synclink_gt_proc_fops, }; static void slgt_cleanup(void) diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 8eb6c89a980e..26de60efe4b2 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -50,6 +50,7 @@ #include <linux/ptrace.h> #include <linux/ioport.h> #include <linux/mm.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> @@ -520,7 +521,6 @@ static void tx_hold(struct tty_struct *tty); static void tx_release(struct tty_struct *tty); static int ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); -static int read_proc(char *page, char **start, off_t off, int count,int *eof, void *data); static int chars_in_buffer(struct tty_struct *tty); static void throttle(struct tty_struct * tty); static void unthrottle(struct tty_struct * tty); @@ -1354,13 +1354,12 @@ static int ioctl(struct tty_struct *tty, struct file *file, * /proc fs routines.... */ -static inline int line_info(char *buf, SLMP_INFO *info) +static inline void line_info(struct seq_file *m, SLMP_INFO *info) { char stat_buf[30]; - int ret; unsigned long flags; - ret = sprintf(buf, "%s: SCABase=%08x Mem=%08X StatusControl=%08x LCR=%08X\n" + seq_printf(m, "%s: SCABase=%08x Mem=%08X StatusControl=%08x LCR=%08X\n" "\tIRQ=%d MaxFrameSize=%u\n", info->device_name, info->phys_sca_base, @@ -1391,75 +1390,70 @@ static inline int line_info(char *buf, SLMP_INFO *info) strcat(stat_buf, "|RI"); if (info->params.mode == MGSL_MODE_HDLC) { - ret += sprintf(buf+ret, "\tHDLC txok:%d rxok:%d", + seq_printf(m, "\tHDLC txok:%d rxok:%d", info->icount.txok, info->icount.rxok); if (info->icount.txunder) - ret += sprintf(buf+ret, " txunder:%d", info->icount.txunder); + seq_printf(m, " txunder:%d", info->icount.txunder); if (info->icount.txabort) - ret += sprintf(buf+ret, " txabort:%d", info->icount.txabort); + seq_printf(m, " txabort:%d", info->icount.txabort); if (info->icount.rxshort) - ret += sprintf(buf+ret, " rxshort:%d", info->icount.rxshort); + seq_printf(m, " rxshort:%d", info->icount.rxshort); if (info->icount.rxlong) - ret += sprintf(buf+ret, " rxlong:%d", info->icount.rxlong); + seq_printf(m, " rxlong:%d", info->icount.rxlong); if (info->icount.rxover) - ret += sprintf(buf+ret, " rxover:%d", info->icount.rxover); + seq_printf(m, " rxover:%d", info->icount.rxover); if (info->icount.rxcrc) - ret += sprintf(buf+ret, " rxlong:%d", info->icount.rxcrc); + seq_printf(m, " rxlong:%d", info->icount.rxcrc); } else { - ret += sprintf(buf+ret, "\tASYNC tx:%d rx:%d", + seq_printf(m, "\tASYNC tx:%d rx:%d", info->icount.tx, info->icount.rx); if (info->icount.frame) - ret += sprintf(buf+ret, " fe:%d", info->icount.frame); + seq_printf(m, " fe:%d", info->icount.frame); if (info->icount.parity) - ret += sprintf(buf+ret, " pe:%d", info->icount.parity); + seq_printf(m, " pe:%d", info->icount.parity); if (info->icount.brk) - ret += sprintf(buf+ret, " brk:%d", info->icount.brk); + seq_printf(m, " brk:%d", info->icount.brk); if (info->icount.overrun) - ret += sprintf(buf+ret, " oe:%d", info->icount.overrun); + seq_printf(m, " oe:%d", info->icount.overrun); } /* Append serial signal status to end */ - ret += sprintf(buf+ret, " %s\n", stat_buf+1); + seq_printf(m, " %s\n", stat_buf+1); - ret += sprintf(buf+ret, "\ttxactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", + seq_printf(m, "\ttxactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", info->tx_active,info->bh_requested,info->bh_running, info->pending_bh); - - return ret; } /* Called to print information about devices */ -static int read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int synclinkmp_proc_show(struct seq_file *m, void *v) { - int len = 0, l; - off_t begin = 0; SLMP_INFO *info; - len += sprintf(page, "synclinkmp driver:%s\n", driver_version); + seq_printf(m, "synclinkmp driver:%s\n", driver_version); info = synclinkmp_device_list; while( info ) { - l = line_info(page + len, info); - len += l; - if (len+begin > off+count) - goto done; - if (len+begin < off) { - begin += len; - len = 0; - } + line_info(m, info); info = info->next_device; } + return 0; +} - *eof = 1; -done: - if (off >= len+begin) - return 0; - *start = page + (off-begin); - return ((count < begin+len-off) ? count : begin+len-off); +static int synclinkmp_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, synclinkmp_proc_show, NULL); } +static const struct file_operations synclinkmp_proc_fops = { + .owner = THIS_MODULE, + .open = synclinkmp_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* Return the count of bytes in transmit buffer */ static int chars_in_buffer(struct tty_struct *tty) @@ -3905,13 +3899,13 @@ static const struct tty_operations ops = { .send_xchar = send_xchar, .break_ctl = set_break, .wait_until_sent = wait_until_sent, - .read_proc = read_proc, .set_termios = set_termios, .stop = tx_hold, .start = tx_release, .hangup = hangup, .tiocmget = tiocmget, .tiocmset = tiocmset, + .proc_fops = &synclinkmp_proc_fops, }; diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 33a9351c896d..ebea9b2c30a5 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -35,7 +35,7 @@ #include <linux/vt_kern.h> #include <linux/workqueue.h> #include <linux/kexec.h> -#include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/hrtimer.h> #include <linux/oom.h> @@ -346,6 +346,19 @@ static struct sysrq_key_op sysrq_moom_op = { .enable_mask = SYSRQ_ENABLE_SIGNAL, }; +#ifdef CONFIG_BLOCK +static void sysrq_handle_thaw(int key, struct tty_struct *tty) +{ + emergency_thaw_all(); +} +static struct sysrq_key_op sysrq_thaw_op = { + .handler = sysrq_handle_thaw, + .help_msg = "thaw-filesystems(J)", + .action_msg = "Emergency Thaw of all frozen filesystems", + .enable_mask = SYSRQ_ENABLE_SIGNAL, +}; +#endif + static void sysrq_handle_kill(int key, struct tty_struct *tty) { send_sig_all(SIGKILL); @@ -396,9 +409,13 @@ static struct sysrq_key_op *sysrq_key_table[36] = { &sysrq_moom_op, /* f */ /* g: May be registered by ppc for kgdb */ NULL, /* g */ - NULL, /* h */ + NULL, /* h - reserved for help */ &sysrq_kill_op, /* i */ +#ifdef CONFIG_BLOCK + &sysrq_thaw_op, /* j */ +#else NULL, /* j */ +#endif &sysrq_SAK_op, /* k */ #ifdef CONFIG_SMP &sysrq_showallcpus_op, /* l */ diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 224f271d8cbe..33dac94922a7 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -464,7 +464,7 @@ void tty_wakeup(struct tty_struct *tty) tty_ldisc_deref(ld); } } - wake_up_interruptible(&tty->write_wait); + wake_up_interruptible_poll(&tty->write_wait, POLLOUT); } EXPORT_SYMBOL_GPL(tty_wakeup); @@ -587,8 +587,8 @@ static void do_tty_hangup(struct work_struct *work) * FIXME: Once we trust the LDISC code better we can wait here for * ldisc completion and fix the driver call race */ - wake_up_interruptible(&tty->write_wait); - wake_up_interruptible(&tty->read_wait); + wake_up_interruptible_poll(&tty->write_wait, POLLOUT); + wake_up_interruptible_poll(&tty->read_wait, POLLIN); /* * Shutdown the current line discipline, and reset it to * N_TTY. @@ -879,7 +879,7 @@ void stop_tty(struct tty_struct *tty) if (tty->link && tty->link->packet) { tty->ctrl_status &= ~TIOCPKT_START; tty->ctrl_status |= TIOCPKT_STOP; - wake_up_interruptible(&tty->link->read_wait); + wake_up_interruptible_poll(&tty->link->read_wait, POLLIN); } spin_unlock_irqrestore(&tty->ctrl_lock, flags); if (tty->ops->stop) @@ -913,7 +913,7 @@ void start_tty(struct tty_struct *tty) if (tty->link && tty->link->packet) { tty->ctrl_status &= ~TIOCPKT_STOP; tty->ctrl_status |= TIOCPKT_START; - wake_up_interruptible(&tty->link->read_wait); + wake_up_interruptible_poll(&tty->link->read_wait, POLLIN); } spin_unlock_irqrestore(&tty->ctrl_lock, flags); if (tty->ops->start) @@ -970,7 +970,7 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, void tty_write_unlock(struct tty_struct *tty) { mutex_unlock(&tty->atomic_write_lock); - wake_up_interruptible(&tty->write_wait); + wake_up_interruptible_poll(&tty->write_wait, POLLOUT); } int tty_write_lock(struct tty_struct *tty, int ndelay) @@ -1623,21 +1623,21 @@ void tty_release_dev(struct file *filp) if (tty_closing) { if (waitqueue_active(&tty->read_wait)) { - wake_up(&tty->read_wait); + wake_up_poll(&tty->read_wait, POLLIN); do_sleep++; } if (waitqueue_active(&tty->write_wait)) { - wake_up(&tty->write_wait); + wake_up_poll(&tty->write_wait, POLLOUT); do_sleep++; } } if (o_tty_closing) { if (waitqueue_active(&o_tty->read_wait)) { - wake_up(&o_tty->read_wait); + wake_up_poll(&o_tty->read_wait, POLLIN); do_sleep++; } if (waitqueue_active(&o_tty->write_wait)) { - wake_up(&o_tty->write_wait); + wake_up_poll(&o_tty->write_wait, POLLOUT); do_sleep++; } } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 51ff9b3d7ea2..ce52bf2f235e 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -571,6 +571,17 @@ config SENSORS_LM93 This driver can also be built as a module. If so, the module will be called lm93. +config SENSORS_LTC4215 + tristate "Linear Technology LTC4215" + depends on I2C && EXPERIMENTAL + default n + help + If you say yes here you get support for Linear Technology LTC4215 + Hot Swap Controller I2C interface. + + This driver can also be built as a module. If so, the module will + be called ltc4215. + config SENSORS_LTC4245 tristate "Linear Technology LTC4245" depends on I2C && EXPERIMENTAL @@ -582,6 +593,15 @@ config SENSORS_LTC4245 This driver can also be built as a module. If so, the module will be called ltc4245. +config SENSORS_LM95241 + tristate "National Semiconductor LM95241 sensor chip" + depends on I2C + help + If you say yes here you get support for LM95241 sensor chip. + + This driver can also be built as a module. If so, the module + will be called lm95241. + config SENSORS_MAX1111 tristate "Maxim MAX1111 Multichannel, Serial 8-bit ADC chip" depends on SPI_MASTER @@ -912,6 +932,22 @@ config SENSORS_LIS3LV02D Say Y here if you have an applicable laptop and want to experience the awesome power of lis3lv02d. +config SENSORS_LIS3_SPI + tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (SPI)" + depends on !ACPI && SPI_MASTER && INPUT + default n + help + This driver provides support for the LIS3LV02Dx accelerometer connected + via SPI. The accelerometer data is readable via + /sys/devices/platform/lis3lv02d. + + This driver also provides an absolute input class device, allowing + the laptop to act as a pinball machine-esque joystick. + + This driver can also be built as modules. If so, the core module + will be called lis3lv02d and a specific module for the SPI transport + is called lis3lv02d_spi. + config SENSORS_APPLESMC tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)" depends on INPUT && X86 diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index e332d6267920..3a6b1f06f8f4 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_SENSORS_IBMPEX) += ibmpex.o obj-$(CONFIG_SENSORS_IT87) += it87.o obj-$(CONFIG_SENSORS_K8TEMP) += k8temp.o obj-$(CONFIG_SENSORS_LIS3LV02D) += lis3lv02d.o hp_accel.o +obj-$(CONFIG_SENSORS_LIS3_SPI) += lis3lv02d.o lis3lv02d_spi.o obj-$(CONFIG_SENSORS_LM63) += lm63.o obj-$(CONFIG_SENSORS_LM70) += lm70.o obj-$(CONFIG_SENSORS_LM75) += lm75.o @@ -64,6 +65,8 @@ obj-$(CONFIG_SENSORS_LM87) += lm87.o obj-$(CONFIG_SENSORS_LM90) += lm90.o obj-$(CONFIG_SENSORS_LM92) += lm92.o obj-$(CONFIG_SENSORS_LM93) += lm93.o +obj-$(CONFIG_SENSORS_LM95241) += lm95241.o +obj-$(CONFIG_SENSORS_LTC4215) += ltc4215.o obj-$(CONFIG_SENSORS_LTC4245) += ltc4245.o obj-$(CONFIG_SENSORS_MAX1111) += max1111.o obj-$(CONFIG_SENSORS_MAX1619) += max1619.o diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c index 29c83b5b9697..55d3dc565be6 100644 --- a/drivers/hwmon/hp_accel.c +++ b/drivers/hwmon/hp_accel.c @@ -85,25 +85,31 @@ MODULE_DEVICE_TABLE(acpi, lis3lv02d_device_ids); /** * lis3lv02d_acpi_init - ACPI _INI method: initialize the device. - * @handle: the handle of the device + * @lis3: pointer to the device struct * - * Returns AE_OK on success. + * Returns 0 on success. */ -acpi_status lis3lv02d_acpi_init(acpi_handle handle) +int lis3lv02d_acpi_init(struct lis3lv02d *lis3) { - return acpi_evaluate_object(handle, METHOD_NAME__INI, NULL, NULL); + struct acpi_device *dev = lis3->bus_priv; + if (acpi_evaluate_object(dev->handle, METHOD_NAME__INI, + NULL, NULL) != AE_OK) + return -EINVAL; + + return 0; } /** * lis3lv02d_acpi_read - ACPI ALRD method: read a register - * @handle: the handle of the device + * @lis3: pointer to the device struct * @reg: the register to read * @ret: result of the operation * - * Returns AE_OK on success. + * Returns 0 on success. */ -acpi_status lis3lv02d_acpi_read(acpi_handle handle, int reg, u8 *ret) +int lis3lv02d_acpi_read(struct lis3lv02d *lis3, int reg, u8 *ret) { + struct acpi_device *dev = lis3->bus_priv; union acpi_object arg0 = { ACPI_TYPE_INTEGER }; struct acpi_object_list args = { 1, &arg0 }; unsigned long long lret; @@ -111,21 +117,22 @@ acpi_status lis3lv02d_acpi_read(acpi_handle handle, int reg, u8 *ret) arg0.integer.value = reg; - status = acpi_evaluate_integer(handle, "ALRD", &args, &lret); + status = acpi_evaluate_integer(dev->handle, "ALRD", &args, &lret); *ret = lret; - return status; + return (status != AE_OK) ? -EINVAL : 0; } /** * lis3lv02d_acpi_write - ACPI ALWR method: write to a register - * @handle: the handle of the device + * @lis3: pointer to the device struct * @reg: the register to write to * @val: the value to write * - * Returns AE_OK on success. + * Returns 0 on success. */ -acpi_status lis3lv02d_acpi_write(acpi_handle handle, int reg, u8 val) +int lis3lv02d_acpi_write(struct lis3lv02d *lis3, int reg, u8 val) { + struct acpi_device *dev = lis3->bus_priv; unsigned long long ret; /* Not used when writting */ union acpi_object in_obj[2]; struct acpi_object_list args = { 2, in_obj }; @@ -135,12 +142,15 @@ acpi_status lis3lv02d_acpi_write(acpi_handle handle, int reg, u8 val) in_obj[1].type = ACPI_TYPE_INTEGER; in_obj[1].integer.value = val; - return acpi_evaluate_integer(handle, "ALWR", &args, &ret); + if (acpi_evaluate_integer(dev->handle, "ALWR", &args, &ret) != AE_OK) + return -EINVAL; + + return 0; } static int lis3lv02d_dmi_matched(const struct dmi_system_id *dmi) { - adev.ac = *((struct axis_conversion *)dmi->driver_data); + lis3_dev.ac = *((struct axis_conversion *)dmi->driver_data); printk(KERN_INFO DRIVER_NAME ": hardware type %s found.\n", dmi->ident); return 1; @@ -187,6 +197,7 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("NC2510", "HP Compaq 2510", y_inverted), AXIS_DMI_MATCH("NC8510", "HP Compaq 8510", xy_swap_inverted), AXIS_DMI_MATCH("HP2133", "HP 2133", xy_rotated_left), + AXIS_DMI_MATCH("HP2140", "HP 2140", xy_swap_inverted), AXIS_DMI_MATCH("NC653x", "HP Compaq 653", xy_rotated_left_usd), AXIS_DMI_MATCH("NC673x", "HP Compaq 673", xy_rotated_left_usd), AXIS_DMI_MATCH("NC651xx", "HP Compaq 651", xy_rotated_right), @@ -201,6 +212,8 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { PRODUCT_NAME, "HP Pavilion dv5", BOARD_NAME, "3600", y_inverted), + AXIS_DMI_MATCH("DV7", "HP Pavilion dv7", x_inverted), + AXIS_DMI_MATCH("HP8710", "HP Compaq 8710", y_inverted), { NULL, } /* Laptop models without axis info (yet): * "NC6910" "HP Compaq 6910" @@ -214,7 +227,7 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { static void hpled_set(struct delayed_led_classdev *led_cdev, enum led_brightness value) { - acpi_handle handle = adev.device->handle; + struct acpi_device *dev = lis3_dev.bus_priv; unsigned long long ret; /* Not used when writing */ union acpi_object in_obj[1]; struct acpi_object_list args = { 1, in_obj }; @@ -222,7 +235,7 @@ static void hpled_set(struct delayed_led_classdev *led_cdev, enum led_brightness in_obj[0].type = ACPI_TYPE_INTEGER; in_obj[0].integer.value = !!value; - acpi_evaluate_integer(handle, "ALED", &args, &ret); + acpi_evaluate_integer(dev->handle, "ALED", &args, &ret); } static struct delayed_led_classdev hpled_led = { @@ -254,28 +267,11 @@ static void lis3lv02d_enum_resources(struct acpi_device *device) acpi_status status; status = acpi_walk_resources(device->handle, METHOD_NAME__CRS, - lis3lv02d_get_resource, &adev.irq); + lis3lv02d_get_resource, &lis3_dev.irq); if (ACPI_FAILURE(status)) printk(KERN_DEBUG DRIVER_NAME ": Error getting resources\n"); } -static s16 lis3lv02d_read_16(acpi_handle handle, int reg) -{ - u8 lo, hi; - - adev.read(handle, reg - 1, &lo); - adev.read(handle, reg, &hi); - /* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */ - return (s16)((hi << 8) | lo); -} - -static s16 lis3lv02d_read_8(acpi_handle handle, int reg) -{ - s8 lo; - adev.read(handle, reg, &lo); - return lo; -} - static int lis3lv02d_add(struct acpi_device *device) { int ret; @@ -283,51 +279,35 @@ static int lis3lv02d_add(struct acpi_device *device) if (!device) return -EINVAL; - adev.device = device; - adev.init = lis3lv02d_acpi_init; - adev.read = lis3lv02d_acpi_read; - adev.write = lis3lv02d_acpi_write; + lis3_dev.bus_priv = device; + lis3_dev.init = lis3lv02d_acpi_init; + lis3_dev.read = lis3lv02d_acpi_read; + lis3_dev.write = lis3lv02d_acpi_write; strcpy(acpi_device_name(device), DRIVER_NAME); strcpy(acpi_device_class(device), ACPI_MDPS_CLASS); - device->driver_data = &adev; - - lis3lv02d_acpi_read(device->handle, WHO_AM_I, &adev.whoami); - switch (adev.whoami) { - case LIS_DOUBLE_ID: - printk(KERN_INFO DRIVER_NAME ": 2-byte sensor found\n"); - adev.read_data = lis3lv02d_read_16; - adev.mdps_max_val = 2048; - break; - case LIS_SINGLE_ID: - printk(KERN_INFO DRIVER_NAME ": 1-byte sensor found\n"); - adev.read_data = lis3lv02d_read_8; - adev.mdps_max_val = 128; - break; - default: - printk(KERN_ERR DRIVER_NAME - ": unknown sensor type 0x%X\n", adev.whoami); - return -EINVAL; - } + device->driver_data = &lis3_dev; + + /* obtain IRQ number of our device from ACPI */ + lis3lv02d_enum_resources(device); /* If possible use a "standard" axes order */ if (dmi_check_system(lis3lv02d_dmi_ids) == 0) { printk(KERN_INFO DRIVER_NAME ": laptop model unknown, " "using default axes configuration\n"); - adev.ac = lis3lv02d_axis_normal; + lis3_dev.ac = lis3lv02d_axis_normal; } - INIT_WORK(&hpled_led.work, delayed_set_status_worker); - ret = led_classdev_register(NULL, &hpled_led.led_classdev); + /* call the core layer do its init */ + ret = lis3lv02d_init_device(&lis3_dev); if (ret) return ret; - /* obtain IRQ number of our device from ACPI */ - lis3lv02d_enum_resources(adev.device); - - ret = lis3lv02d_init_device(&adev); + INIT_WORK(&hpled_led.work, delayed_set_status_worker); + ret = led_classdev_register(NULL, &hpled_led.led_classdev); if (ret) { + lis3lv02d_joystick_disable(); + lis3lv02d_poweroff(&lis3_dev); flush_work(&hpled_led.work); - led_classdev_unregister(&hpled_led.led_classdev); return ret; } @@ -340,7 +320,7 @@ static int lis3lv02d_remove(struct acpi_device *device, int type) return -EINVAL; lis3lv02d_joystick_disable(); - lis3lv02d_poweroff(device->handle); + lis3lv02d_poweroff(&lis3_dev); flush_work(&hpled_led.work); led_classdev_unregister(&hpled_led.led_classdev); @@ -353,19 +333,19 @@ static int lis3lv02d_remove(struct acpi_device *device, int type) static int lis3lv02d_suspend(struct acpi_device *device, pm_message_t state) { /* make sure the device is off when we suspend */ - lis3lv02d_poweroff(device->handle); + lis3lv02d_poweroff(&lis3_dev); return 0; } static int lis3lv02d_resume(struct acpi_device *device) { /* put back the device in the right state (ACPI might turn it on) */ - mutex_lock(&adev.lock); - if (adev.usage > 0) - lis3lv02d_poweron(device->handle); + mutex_lock(&lis3_dev.lock); + if (lis3_dev.usage > 0) + lis3lv02d_poweron(&lis3_dev); else - lis3lv02d_poweroff(device->handle); - mutex_unlock(&adev.lock); + lis3lv02d_poweroff(&lis3_dev); + mutex_unlock(&lis3_dev.lock); return 0; } #else diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c index 8bb2158f0453..778eb7795983 100644 --- a/drivers/hwmon/lis3lv02d.c +++ b/drivers/hwmon/lis3lv02d.c @@ -36,7 +36,6 @@ #include <linux/freezer.h> #include <linux/uaccess.h> #include <linux/miscdevice.h> -#include <acpi/acpi_drivers.h> #include <asm/atomic.h> #include "lis3lv02d.h" @@ -53,13 +52,30 @@ * joystick. */ -struct acpi_lis3lv02d adev = { - .misc_wait = __WAIT_QUEUE_HEAD_INITIALIZER(adev.misc_wait), +struct lis3lv02d lis3_dev = { + .misc_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lis3_dev.misc_wait), }; -EXPORT_SYMBOL_GPL(adev); +EXPORT_SYMBOL_GPL(lis3_dev); -static int lis3lv02d_add_fs(struct acpi_device *device); +static s16 lis3lv02d_read_8(struct lis3lv02d *lis3, int reg) +{ + s8 lo; + if (lis3->read(lis3, reg, &lo) < 0) + return 0; + + return lo; +} + +static s16 lis3lv02d_read_16(struct lis3lv02d *lis3, int reg) +{ + u8 lo, hi; + + lis3->read(lis3, reg - 1, &lo); + lis3->read(lis3, reg, &hi); + /* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */ + return (s16)((hi << 8) | lo); +} /** * lis3lv02d_get_axis - For the given axis, give the value converted @@ -78,36 +94,36 @@ static inline int lis3lv02d_get_axis(s8 axis, int hw_values[3]) /** * lis3lv02d_get_xyz - Get X, Y and Z axis values from the accelerometer - * @handle: the handle to the device - * @x: where to store the X axis value - * @y: where to store the Y axis value - * @z: where to store the Z axis value + * @lis3: pointer to the device struct + * @x: where to store the X axis value + * @y: where to store the Y axis value + * @z: where to store the Z axis value * * Note that 40Hz input device can eat up about 10% CPU at 800MHZ */ -static void lis3lv02d_get_xyz(acpi_handle handle, int *x, int *y, int *z) +static void lis3lv02d_get_xyz(struct lis3lv02d *lis3, int *x, int *y, int *z) { int position[3]; - position[0] = adev.read_data(handle, OUTX); - position[1] = adev.read_data(handle, OUTY); - position[2] = adev.read_data(handle, OUTZ); + position[0] = lis3_dev.read_data(lis3, OUTX); + position[1] = lis3_dev.read_data(lis3, OUTY); + position[2] = lis3_dev.read_data(lis3, OUTZ); - *x = lis3lv02d_get_axis(adev.ac.x, position); - *y = lis3lv02d_get_axis(adev.ac.y, position); - *z = lis3lv02d_get_axis(adev.ac.z, position); + *x = lis3lv02d_get_axis(lis3_dev.ac.x, position); + *y = lis3lv02d_get_axis(lis3_dev.ac.y, position); + *z = lis3lv02d_get_axis(lis3_dev.ac.z, position); } -void lis3lv02d_poweroff(acpi_handle handle) +void lis3lv02d_poweroff(struct lis3lv02d *lis3) { - adev.is_on = 0; + lis3_dev.is_on = 0; } EXPORT_SYMBOL_GPL(lis3lv02d_poweroff); -void lis3lv02d_poweron(acpi_handle handle) +void lis3lv02d_poweron(struct lis3lv02d *lis3) { - adev.is_on = 1; - adev.init(handle); + lis3_dev.is_on = 1; + lis3_dev.init(lis3); } EXPORT_SYMBOL_GPL(lis3lv02d_poweron); @@ -116,13 +132,13 @@ EXPORT_SYMBOL_GPL(lis3lv02d_poweron); * device will always be on until a call to lis3lv02d_decrease_use(). Not to be * used from interrupt context. */ -static void lis3lv02d_increase_use(struct acpi_lis3lv02d *dev) +static void lis3lv02d_increase_use(struct lis3lv02d *dev) { mutex_lock(&dev->lock); dev->usage++; if (dev->usage == 1) { if (!dev->is_on) - lis3lv02d_poweron(dev->device->handle); + lis3lv02d_poweron(dev); } mutex_unlock(&dev->lock); } @@ -131,12 +147,12 @@ static void lis3lv02d_increase_use(struct acpi_lis3lv02d *dev) * To be called whenever a usage of the device is stopped. * It will make sure to turn off the device when there is not usage. */ -static void lis3lv02d_decrease_use(struct acpi_lis3lv02d *dev) +static void lis3lv02d_decrease_use(struct lis3lv02d *dev) { mutex_lock(&dev->lock); dev->usage--; if (dev->usage == 0) - lis3lv02d_poweroff(dev->device->handle); + lis3lv02d_poweroff(dev); mutex_unlock(&dev->lock); } @@ -147,10 +163,10 @@ static irqreturn_t lis302dl_interrupt(int irq, void *dummy) * the lid is closed. This leads to interrupts as soon as a little move * is done. */ - atomic_inc(&adev.count); + atomic_inc(&lis3_dev.count); - wake_up_interruptible(&adev.misc_wait); - kill_fasync(&adev.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&lis3_dev.misc_wait); + kill_fasync(&lis3_dev.async_queue, SIGIO, POLL_IN); return IRQ_HANDLED; } @@ -158,10 +174,10 @@ static int lis3lv02d_misc_open(struct inode *inode, struct file *file) { int ret; - if (test_and_set_bit(0, &adev.misc_opened)) + if (test_and_set_bit(0, &lis3_dev.misc_opened)) return -EBUSY; /* already open */ - atomic_set(&adev.count, 0); + atomic_set(&lis3_dev.count, 0); /* * The sensor can generate interrupts for free-fall and direction @@ -174,25 +190,25 @@ static int lis3lv02d_misc_open(struct inode *inode, struct file *file) * io-apic is not configurable (and generates a warning) but I keep it * in case of support for other hardware. */ - ret = request_irq(adev.irq, lis302dl_interrupt, IRQF_TRIGGER_RISING, - DRIVER_NAME, &adev); + ret = request_irq(lis3_dev.irq, lis302dl_interrupt, IRQF_TRIGGER_RISING, + DRIVER_NAME, &lis3_dev); if (ret) { - clear_bit(0, &adev.misc_opened); - printk(KERN_ERR DRIVER_NAME ": IRQ%d allocation failed\n", adev.irq); + clear_bit(0, &lis3_dev.misc_opened); + printk(KERN_ERR DRIVER_NAME ": IRQ%d allocation failed\n", lis3_dev.irq); return -EBUSY; } - lis3lv02d_increase_use(&adev); - printk("lis3: registered interrupt %d\n", adev.irq); + lis3lv02d_increase_use(&lis3_dev); + printk("lis3: registered interrupt %d\n", lis3_dev.irq); return 0; } static int lis3lv02d_misc_release(struct inode *inode, struct file *file) { - fasync_helper(-1, file, 0, &adev.async_queue); - lis3lv02d_decrease_use(&adev); - free_irq(adev.irq, &adev); - clear_bit(0, &adev.misc_opened); /* release the device */ + fasync_helper(-1, file, 0, &lis3_dev.async_queue); + lis3lv02d_decrease_use(&lis3_dev); + free_irq(lis3_dev.irq, &lis3_dev); + clear_bit(0, &lis3_dev.misc_opened); /* release the device */ return 0; } @@ -207,10 +223,10 @@ static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf, if (count < 1) return -EINVAL; - add_wait_queue(&adev.misc_wait, &wait); + add_wait_queue(&lis3_dev.misc_wait, &wait); while (true) { set_current_state(TASK_INTERRUPTIBLE); - data = atomic_xchg(&adev.count, 0); + data = atomic_xchg(&lis3_dev.count, 0); if (data) break; @@ -240,22 +256,22 @@ static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf, out: __set_current_state(TASK_RUNNING); - remove_wait_queue(&adev.misc_wait, &wait); + remove_wait_queue(&lis3_dev.misc_wait, &wait); return retval; } static unsigned int lis3lv02d_misc_poll(struct file *file, poll_table *wait) { - poll_wait(file, &adev.misc_wait, wait); - if (atomic_read(&adev.count)) + poll_wait(file, &lis3_dev.misc_wait, wait); + if (atomic_read(&lis3_dev.count)) return POLLIN | POLLRDNORM; return 0; } static int lis3lv02d_misc_fasync(int fd, struct file *file, int on) { - return fasync_helper(fd, file, on, &adev.async_queue); + return fasync_helper(fd, file, on, &lis3_dev.async_queue); } static const struct file_operations lis3lv02d_misc_fops = { @@ -283,12 +299,12 @@ static int lis3lv02d_joystick_kthread(void *data) int x, y, z; while (!kthread_should_stop()) { - lis3lv02d_get_xyz(adev.device->handle, &x, &y, &z); - input_report_abs(adev.idev, ABS_X, x - adev.xcalib); - input_report_abs(adev.idev, ABS_Y, y - adev.ycalib); - input_report_abs(adev.idev, ABS_Z, z - adev.zcalib); + lis3lv02d_get_xyz(&lis3_dev, &x, &y, &z); + input_report_abs(lis3_dev.idev, ABS_X, x - lis3_dev.xcalib); + input_report_abs(lis3_dev.idev, ABS_Y, y - lis3_dev.ycalib); + input_report_abs(lis3_dev.idev, ABS_Z, z - lis3_dev.zcalib); - input_sync(adev.idev); + input_sync(lis3_dev.idev); try_to_freeze(); msleep_interruptible(MDPS_POLL_INTERVAL); @@ -299,11 +315,11 @@ static int lis3lv02d_joystick_kthread(void *data) static int lis3lv02d_joystick_open(struct input_dev *input) { - lis3lv02d_increase_use(&adev); - adev.kthread = kthread_run(lis3lv02d_joystick_kthread, NULL, "klis3lv02d"); - if (IS_ERR(adev.kthread)) { - lis3lv02d_decrease_use(&adev); - return PTR_ERR(adev.kthread); + lis3lv02d_increase_use(&lis3_dev); + lis3_dev.kthread = kthread_run(lis3lv02d_joystick_kthread, NULL, "klis3lv02d"); + if (IS_ERR(lis3_dev.kthread)) { + lis3lv02d_decrease_use(&lis3_dev); + return PTR_ERR(lis3_dev.kthread); } return 0; @@ -311,45 +327,46 @@ static int lis3lv02d_joystick_open(struct input_dev *input) static void lis3lv02d_joystick_close(struct input_dev *input) { - kthread_stop(adev.kthread); - lis3lv02d_decrease_use(&adev); + kthread_stop(lis3_dev.kthread); + lis3lv02d_decrease_use(&lis3_dev); } static inline void lis3lv02d_calibrate_joystick(void) { - lis3lv02d_get_xyz(adev.device->handle, &adev.xcalib, &adev.ycalib, &adev.zcalib); + lis3lv02d_get_xyz(&lis3_dev, + &lis3_dev.xcalib, &lis3_dev.ycalib, &lis3_dev.zcalib); } int lis3lv02d_joystick_enable(void) { int err; - if (adev.idev) + if (lis3_dev.idev) return -EINVAL; - adev.idev = input_allocate_device(); - if (!adev.idev) + lis3_dev.idev = input_allocate_device(); + if (!lis3_dev.idev) return -ENOMEM; lis3lv02d_calibrate_joystick(); - adev.idev->name = "ST LIS3LV02DL Accelerometer"; - adev.idev->phys = DRIVER_NAME "/input0"; - adev.idev->id.bustype = BUS_HOST; - adev.idev->id.vendor = 0; - adev.idev->dev.parent = &adev.pdev->dev; - adev.idev->open = lis3lv02d_joystick_open; - adev.idev->close = lis3lv02d_joystick_close; + lis3_dev.idev->name = "ST LIS3LV02DL Accelerometer"; + lis3_dev.idev->phys = DRIVER_NAME "/input0"; + lis3_dev.idev->id.bustype = BUS_HOST; + lis3_dev.idev->id.vendor = 0; + lis3_dev.idev->dev.parent = &lis3_dev.pdev->dev; + lis3_dev.idev->open = lis3lv02d_joystick_open; + lis3_dev.idev->close = lis3lv02d_joystick_close; - set_bit(EV_ABS, adev.idev->evbit); - input_set_abs_params(adev.idev, ABS_X, -adev.mdps_max_val, adev.mdps_max_val, 3, 3); - input_set_abs_params(adev.idev, ABS_Y, -adev.mdps_max_val, adev.mdps_max_val, 3, 3); - input_set_abs_params(adev.idev, ABS_Z, -adev.mdps_max_val, adev.mdps_max_val, 3, 3); + set_bit(EV_ABS, lis3_dev.idev->evbit); + input_set_abs_params(lis3_dev.idev, ABS_X, -lis3_dev.mdps_max_val, lis3_dev.mdps_max_val, 3, 3); + input_set_abs_params(lis3_dev.idev, ABS_Y, -lis3_dev.mdps_max_val, lis3_dev.mdps_max_val, 3, 3); + input_set_abs_params(lis3_dev.idev, ABS_Z, -lis3_dev.mdps_max_val, lis3_dev.mdps_max_val, 3, 3); - err = input_register_device(adev.idev); + err = input_register_device(lis3_dev.idev); if (err) { - input_free_device(adev.idev); - adev.idev = NULL; + input_free_device(lis3_dev.idev); + lis3_dev.idev = NULL; } return err; @@ -358,71 +375,40 @@ EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable); void lis3lv02d_joystick_disable(void) { - if (!adev.idev) + if (!lis3_dev.idev) return; misc_deregister(&lis3lv02d_misc_device); - input_unregister_device(adev.idev); - adev.idev = NULL; + input_unregister_device(lis3_dev.idev); + lis3_dev.idev = NULL; } EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable); -/* - * Initialise the accelerometer and the various subsystems. - * Should be rather independant of the bus system. - */ -int lis3lv02d_init_device(struct acpi_lis3lv02d *dev) -{ - mutex_init(&dev->lock); - lis3lv02d_add_fs(dev->device); - lis3lv02d_increase_use(dev); - - if (lis3lv02d_joystick_enable()) - printk(KERN_ERR DRIVER_NAME ": joystick initialization failed\n"); - - printk("lis3_init_device: irq %d\n", dev->irq); - - /* if we did not get an IRQ from ACPI - we have nothing more to do */ - if (!dev->irq) { - printk(KERN_ERR DRIVER_NAME - ": No IRQ in ACPI. Disabling /dev/freefall\n"); - goto out; - } - - printk("lis3: registering device\n"); - if (misc_register(&lis3lv02d_misc_device)) - printk(KERN_ERR DRIVER_NAME ": misc_register failed\n"); -out: - lis3lv02d_decrease_use(dev); - return 0; -} -EXPORT_SYMBOL_GPL(lis3lv02d_init_device); - /* Sysfs stuff */ static ssize_t lis3lv02d_position_show(struct device *dev, struct device_attribute *attr, char *buf) { int x, y, z; - lis3lv02d_increase_use(&adev); - lis3lv02d_get_xyz(adev.device->handle, &x, &y, &z); - lis3lv02d_decrease_use(&adev); + lis3lv02d_increase_use(&lis3_dev); + lis3lv02d_get_xyz(&lis3_dev, &x, &y, &z); + lis3lv02d_decrease_use(&lis3_dev); return sprintf(buf, "(%d,%d,%d)\n", x, y, z); } static ssize_t lis3lv02d_calibrate_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "(%d,%d,%d)\n", adev.xcalib, adev.ycalib, adev.zcalib); + return sprintf(buf, "(%d,%d,%d)\n", lis3_dev.xcalib, lis3_dev.ycalib, lis3_dev.zcalib); } static ssize_t lis3lv02d_calibrate_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - lis3lv02d_increase_use(&adev); + lis3lv02d_increase_use(&lis3_dev); lis3lv02d_calibrate_joystick(); - lis3lv02d_decrease_use(&adev); + lis3lv02d_decrease_use(&lis3_dev); return count; } @@ -434,9 +420,9 @@ static ssize_t lis3lv02d_rate_show(struct device *dev, u8 ctrl; int val; - lis3lv02d_increase_use(&adev); - adev.read(adev.device->handle, CTRL_REG1, &ctrl); - lis3lv02d_decrease_use(&adev); + lis3lv02d_increase_use(&lis3_dev); + lis3_dev.read(&lis3_dev, CTRL_REG1, &ctrl); + lis3lv02d_decrease_use(&lis3_dev); val = (ctrl & (CTRL1_DF0 | CTRL1_DF1)) >> 4; return sprintf(buf, "%d\n", lis3lv02dl_df_val[val]); } @@ -458,23 +444,73 @@ static struct attribute_group lis3lv02d_attribute_group = { }; -static int lis3lv02d_add_fs(struct acpi_device *device) +static int lis3lv02d_add_fs(struct lis3lv02d *lis3) { - adev.pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0); - if (IS_ERR(adev.pdev)) - return PTR_ERR(adev.pdev); + lis3_dev.pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0); + if (IS_ERR(lis3_dev.pdev)) + return PTR_ERR(lis3_dev.pdev); - return sysfs_create_group(&adev.pdev->dev.kobj, &lis3lv02d_attribute_group); + return sysfs_create_group(&lis3_dev.pdev->dev.kobj, &lis3lv02d_attribute_group); } int lis3lv02d_remove_fs(void) { - sysfs_remove_group(&adev.pdev->dev.kobj, &lis3lv02d_attribute_group); - platform_device_unregister(adev.pdev); + sysfs_remove_group(&lis3_dev.pdev->dev.kobj, &lis3lv02d_attribute_group); + platform_device_unregister(lis3_dev.pdev); return 0; } EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs); +/* + * Initialise the accelerometer and the various subsystems. + * Should be rather independant of the bus system. + */ +int lis3lv02d_init_device(struct lis3lv02d *dev) +{ + dev->whoami = lis3lv02d_read_8(dev, WHO_AM_I); + + switch (dev->whoami) { + case LIS_DOUBLE_ID: + printk(KERN_INFO DRIVER_NAME ": 2-byte sensor found\n"); + dev->read_data = lis3lv02d_read_16; + dev->mdps_max_val = 2048; + break; + case LIS_SINGLE_ID: + printk(KERN_INFO DRIVER_NAME ": 1-byte sensor found\n"); + dev->read_data = lis3lv02d_read_8; + dev->mdps_max_val = 128; + break; + default: + printk(KERN_ERR DRIVER_NAME + ": unknown sensor type 0x%X\n", lis3_dev.whoami); + return -EINVAL; + } + + mutex_init(&dev->lock); + lis3lv02d_add_fs(dev); + lis3lv02d_increase_use(dev); + + if (lis3lv02d_joystick_enable()) + printk(KERN_ERR DRIVER_NAME ": joystick initialization failed\n"); + + printk("lis3_init_device: irq %d\n", dev->irq); + + /* bail if we did not get an IRQ from the bus layer */ + if (!dev->irq) { + printk(KERN_ERR DRIVER_NAME + ": No IRQ. Disabling /dev/freefall\n"); + goto out; + } + + printk("lis3: registering device\n"); + if (misc_register(&lis3lv02d_misc_device)) + printk(KERN_ERR DRIVER_NAME ": misc_register failed\n"); +out: + lis3lv02d_decrease_use(dev); + return 0; +} +EXPORT_SYMBOL_GPL(lis3lv02d_init_device); + MODULE_DESCRIPTION("ST LIS3LV02Dx three-axis digital accelerometer driver"); MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek"); MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h index 75972bf372ff..745ec96806d4 100644 --- a/drivers/hwmon/lis3lv02d.h +++ b/drivers/hwmon/lis3lv02d.h @@ -159,14 +159,14 @@ struct axis_conversion { s8 z; }; -struct acpi_lis3lv02d { - struct acpi_device *device; /* The ACPI device */ - acpi_status (*init) (acpi_handle handle); - acpi_status (*write) (acpi_handle handle, int reg, u8 val); - acpi_status (*read) (acpi_handle handle, int reg, u8 *ret); +struct lis3lv02d { + void *bus_priv; /* used by the bus layer only */ + int (*init) (struct lis3lv02d *lis3); + int (*write) (struct lis3lv02d *lis3, int reg, u8 val); + int (*read) (struct lis3lv02d *lis3, int reg, u8 *ret); u8 whoami; /* 3Ah: 2-byte registries, 3Bh: 1-byte registries */ - s16 (*read_data) (acpi_handle handle, int reg); + s16 (*read_data) (struct lis3lv02d *lis3, int reg); int mdps_max_val; struct input_dev *idev; /* input device */ @@ -187,11 +187,11 @@ struct acpi_lis3lv02d { unsigned long misc_opened; /* bit0: whether the device is open */ }; -int lis3lv02d_init_device(struct acpi_lis3lv02d *dev); +int lis3lv02d_init_device(struct lis3lv02d *lis3); int lis3lv02d_joystick_enable(void); void lis3lv02d_joystick_disable(void); -void lis3lv02d_poweroff(acpi_handle handle); -void lis3lv02d_poweron(acpi_handle handle); +void lis3lv02d_poweroff(struct lis3lv02d *lis3); +void lis3lv02d_poweron(struct lis3lv02d *lis3); int lis3lv02d_remove_fs(void); -extern struct acpi_lis3lv02d adev; +extern struct lis3lv02d lis3_dev; diff --git a/drivers/hwmon/lis3lv02d_spi.c b/drivers/hwmon/lis3lv02d_spi.c new file mode 100644 index 000000000000..07ae74b0e191 --- /dev/null +++ b/drivers/hwmon/lis3lv02d_spi.c @@ -0,0 +1,114 @@ +/* + * lis3lv02d_spi - SPI glue layer for lis3lv02d + * + * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * publishhed by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/err.h> +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/workqueue.h> +#include <linux/spi/spi.h> + +#include "lis3lv02d.h" + +#define DRV_NAME "lis3lv02d_spi" +#define LIS3_SPI_READ 0x80 + +static int lis3_spi_read(struct lis3lv02d *lis3, int reg, u8 *v) +{ + struct spi_device *spi = lis3->bus_priv; + int ret = spi_w8r8(spi, reg | LIS3_SPI_READ); + if (ret < 0) + return -EINVAL; + + *v = (u8) ret; + return 0; +} + +static int lis3_spi_write(struct lis3lv02d *lis3, int reg, u8 val) +{ + u8 tmp[2] = { reg, val }; + struct spi_device *spi = lis3->bus_priv; + return spi_write(spi, tmp, sizeof(tmp)); +} + +static int lis3_spi_init(struct lis3lv02d *lis3) +{ + u8 reg; + int ret; + + /* power up the device */ + ret = lis3->read(lis3, CTRL_REG1, ®); + if (ret < 0) + return ret; + + reg |= CTRL1_PD0; + return lis3->write(lis3, CTRL_REG1, reg); +} + +static struct axis_conversion lis3lv02d_axis_normal = { 1, 2, 3 }; + +static int __devinit lis302dl_spi_probe(struct spi_device *spi) +{ + int ret; + + spi->bits_per_word = 8; + spi->mode = SPI_MODE_0; + ret = spi_setup(spi); + if (ret < 0) + return ret; + + lis3_dev.bus_priv = spi; + lis3_dev.init = lis3_spi_init; + lis3_dev.read = lis3_spi_read; + lis3_dev.write = lis3_spi_write; + lis3_dev.irq = spi->irq; + lis3_dev.ac = lis3lv02d_axis_normal; + spi_set_drvdata(spi, &lis3_dev); + + ret = lis3lv02d_init_device(&lis3_dev); + return ret; +} + +static int __devexit lis302dl_spi_remove(struct spi_device *spi) +{ + struct lis3lv02d *lis3 = spi_get_drvdata(spi); + lis3lv02d_joystick_disable(); + lis3lv02d_poweroff(lis3); + return 0; +} + +static struct spi_driver lis302dl_spi_driver = { + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + }, + .probe = lis302dl_spi_probe, + .remove = __devexit_p(lis302dl_spi_remove), +}; + +static int __init lis302dl_init(void) +{ + return spi_register_driver(&lis302dl_spi_driver); +} + +static void __exit lis302dl_exit(void) +{ + spi_unregister_driver(&lis302dl_spi_driver); +} + +module_init(lis302dl_init); +module_exit(lis302dl_exit); + +MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>"); +MODULE_DESCRIPTION("lis3lv02d SPI glue layer"); +MODULE_LICENSE("GPL"); + diff --git a/drivers/hwmon/lm95241.c b/drivers/hwmon/lm95241.c new file mode 100644 index 000000000000..091d95f38aaa --- /dev/null +++ b/drivers/hwmon/lm95241.c @@ -0,0 +1,527 @@ +/* + * lm95241.c - Part of lm_sensors, Linux kernel modules for hardware + * monitoring + * Copyright (C) 2008 Davide Rizzo <elpa-rizzo@gmail.com> + * + * Based on the max1619 driver. The LM95241 is a sensor chip made by National + * Semiconductors. + * It reports up to three temperatures (its own plus up to + * two external ones). Complete datasheet can be + * obtained from National's website at: + * http://www.national.com/ds.cgi/LM/LM95241.pdf + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/jiffies.h> +#include <linux/i2c.h> +#include <linux/hwmon.h> +#include <linux/hwmon-sysfs.h> +#include <linux/err.h> +#include <linux/mutex.h> +#include <linux/sysfs.h> + +static const unsigned short normal_i2c[] = { + 0x19, 0x2a, 0x2b, I2C_CLIENT_END}; + +/* Insmod parameters */ +I2C_CLIENT_INSMOD_1(lm95241); + +/* LM95241 registers */ +#define LM95241_REG_R_MAN_ID 0xFE +#define LM95241_REG_R_CHIP_ID 0xFF +#define LM95241_REG_R_STATUS 0x02 +#define LM95241_REG_RW_CONFIG 0x03 +#define LM95241_REG_RW_REM_FILTER 0x06 +#define LM95241_REG_RW_TRUTHERM 0x07 +#define LM95241_REG_W_ONE_SHOT 0x0F +#define LM95241_REG_R_LOCAL_TEMPH 0x10 +#define LM95241_REG_R_REMOTE1_TEMPH 0x11 +#define LM95241_REG_R_REMOTE2_TEMPH 0x12 +#define LM95241_REG_R_LOCAL_TEMPL 0x20 +#define LM95241_REG_R_REMOTE1_TEMPL 0x21 +#define LM95241_REG_R_REMOTE2_TEMPL 0x22 +#define LM95241_REG_RW_REMOTE_MODEL 0x30 + +/* LM95241 specific bitfields */ +#define CFG_STOP 0x40 +#define CFG_CR0076 0x00 +#define CFG_CR0182 0x10 +#define CFG_CR1000 0x20 +#define CFG_CR2700 0x30 +#define R1MS_SHIFT 0 +#define R2MS_SHIFT 2 +#define R1MS_MASK (0x01 << (R1MS_SHIFT)) +#define R2MS_MASK (0x01 << (R2MS_SHIFT)) +#define R1DF_SHIFT 1 +#define R2DF_SHIFT 2 +#define R1DF_MASK (0x01 << (R1DF_SHIFT)) +#define R2DF_MASK (0x01 << (R2DF_SHIFT)) +#define R1FE_MASK 0x01 +#define R2FE_MASK 0x05 +#define TT1_SHIFT 0 +#define TT2_SHIFT 4 +#define TT_OFF 0 +#define TT_ON 1 +#define TT_MASK 7 +#define MANUFACTURER_ID 0x01 +#define DEFAULT_REVISION 0xA4 + +/* Conversions and various macros */ +#define TEMP_FROM_REG(val_h, val_l) (((val_h) & 0x80 ? (val_h) - 0x100 : \ + (val_h)) * 1000 + (val_l) * 1000 / 256) + +/* Functions declaration */ +static int lm95241_attach_adapter(struct i2c_adapter *adapter); +static int lm95241_detect(struct i2c_adapter *adapter, int address, + int kind); +static void lm95241_init_client(struct i2c_client *client); +static int lm95241_detach_client(struct i2c_client *client); +static struct lm95241_data *lm95241_update_device(struct device *dev); + +/* Driver data (common to all clients) */ +static struct i2c_driver lm95241_driver = { + .driver = { + .name = "lm95241", + }, + .attach_adapter = lm95241_attach_adapter, + .detach_client = lm95241_detach_client, +}; + +/* Client data (each client gets its own) */ +struct lm95241_data { + struct i2c_client client; + struct device *hwmon_dev; + struct mutex update_lock; + unsigned long last_updated, rate; /* in jiffies */ + char valid; /* zero until following fields are valid */ + /* registers values */ + u8 local_h, local_l; /* local */ + u8 remote1_h, remote1_l; /* remote1 */ + u8 remote2_h, remote2_l; /* remote2 */ + u8 config, model, trutherm; +}; + +/* Sysfs stuff */ +#define show_temp(value) \ +static ssize_t show_##value(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct lm95241_data *data = lm95241_update_device(dev); \ + snprintf(buf, PAGE_SIZE - 1, "%d\n", \ + TEMP_FROM_REG(data->value##_h, data->value##_l)); \ + return strlen(buf); \ +} +show_temp(local); +show_temp(remote1); +show_temp(remote2); + +static ssize_t show_rate(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct lm95241_data *data = lm95241_update_device(dev); + + snprintf(buf, PAGE_SIZE - 1, "%lu\n", 1000 * data->rate / HZ); + return strlen(buf); +} + +static ssize_t set_rate(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct lm95241_data *data = i2c_get_clientdata(client); + + strict_strtol(buf, 10, &data->rate); + data->rate = data->rate * HZ / 1000; + + return count; +} + +#define show_type(flag) \ +static ssize_t show_type##flag(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct i2c_client *client = to_i2c_client(dev); \ + struct lm95241_data *data = i2c_get_clientdata(client); \ +\ + snprintf(buf, PAGE_SIZE - 1, \ + data->model & R##flag##MS_MASK ? "1\n" : "2\n"); \ + return strlen(buf); \ +} +show_type(1); +show_type(2); + +#define show_min(flag) \ +static ssize_t show_min##flag(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct i2c_client *client = to_i2c_client(dev); \ + struct lm95241_data *data = i2c_get_clientdata(client); \ +\ + snprintf(buf, PAGE_SIZE - 1, \ + data->config & R##flag##DF_MASK ? \ + "-127000\n" : "0\n"); \ + return strlen(buf); \ +} +show_min(1); +show_min(2); + +#define show_max(flag) \ +static ssize_t show_max##flag(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct i2c_client *client = to_i2c_client(dev); \ + struct lm95241_data *data = i2c_get_clientdata(client); \ +\ + snprintf(buf, PAGE_SIZE - 1, \ + data->config & R##flag##DF_MASK ? \ + "127000\n" : "255000\n"); \ + return strlen(buf); \ +} +show_max(1); +show_max(2); + +#define set_type(flag) \ +static ssize_t set_type##flag(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + struct i2c_client *client = to_i2c_client(dev); \ + struct lm95241_data *data = i2c_get_clientdata(client); \ +\ + long val; \ + strict_strtol(buf, 10, &val); \ +\ + if ((val == 1) || (val == 2)) { \ +\ + mutex_lock(&data->update_lock); \ +\ + data->trutherm &= ~(TT_MASK << TT##flag##_SHIFT); \ + if (val == 1) { \ + data->model |= R##flag##MS_MASK; \ + data->trutherm |= (TT_ON << TT##flag##_SHIFT); \ + } \ + else { \ + data->model &= ~R##flag##MS_MASK; \ + data->trutherm |= (TT_OFF << TT##flag##_SHIFT); \ + } \ +\ + data->valid = 0; \ +\ + i2c_smbus_write_byte_data(client, LM95241_REG_RW_REMOTE_MODEL, \ + data->model); \ + i2c_smbus_write_byte_data(client, LM95241_REG_RW_TRUTHERM, \ + data->trutherm); \ +\ + mutex_unlock(&data->update_lock); \ +\ + } \ + return count; \ +} +set_type(1); +set_type(2); + +#define set_min(flag) \ +static ssize_t set_min##flag(struct device *dev, \ + struct device_attribute *devattr, const char *buf, size_t count) \ +{ \ + struct i2c_client *client = to_i2c_client(dev); \ + struct lm95241_data *data = i2c_get_clientdata(client); \ +\ + long val; \ + strict_strtol(buf, 10, &val); \ +\ + mutex_lock(&data->update_lock); \ +\ + if (val < 0) \ + data->config |= R##flag##DF_MASK; \ + else \ + data->config &= ~R##flag##DF_MASK; \ +\ + data->valid = 0; \ +\ + i2c_smbus_write_byte_data(client, LM95241_REG_RW_CONFIG, \ + data->config); \ +\ + mutex_unlock(&data->update_lock); \ +\ + return count; \ +} +set_min(1); +set_min(2); + +#define set_max(flag) \ +static ssize_t set_max##flag(struct device *dev, \ + struct device_attribute *devattr, const char *buf, size_t count) \ +{ \ + struct i2c_client *client = to_i2c_client(dev); \ + struct lm95241_data *data = i2c_get_clientdata(client); \ +\ + long val; \ + strict_strtol(buf, 10, &val); \ +\ + mutex_lock(&data->update_lock); \ +\ + if (val <= 127000) \ + data->config |= R##flag##DF_MASK; \ + else \ + data->config &= ~R##flag##DF_MASK; \ +\ + data->valid = 0; \ +\ + i2c_smbus_write_byte_data(client, LM95241_REG_RW_CONFIG, \ + data->config); \ +\ + mutex_unlock(&data->update_lock); \ +\ + return count; \ +} +set_max(1); +set_max(2); + +static DEVICE_ATTR(temp1_input, S_IRUGO, show_local, NULL); +static DEVICE_ATTR(temp2_input, S_IRUGO, show_remote1, NULL); +static DEVICE_ATTR(temp3_input, S_IRUGO, show_remote2, NULL); +static DEVICE_ATTR(temp2_type, S_IWUSR | S_IRUGO, show_type1, set_type1); +static DEVICE_ATTR(temp3_type, S_IWUSR | S_IRUGO, show_type2, set_type2); +static DEVICE_ATTR(temp2_min, S_IWUSR | S_IRUGO, show_min1, set_min1); +static DEVICE_ATTR(temp3_min, S_IWUSR | S_IRUGO, show_min2, set_min2); +static DEVICE_ATTR(temp2_max, S_IWUSR | S_IRUGO, show_max1, set_max1); +static DEVICE_ATTR(temp3_max, S_IWUSR | S_IRUGO, show_max2, set_max2); +static DEVICE_ATTR(rate, S_IWUSR | S_IRUGO, show_rate, set_rate); + +static struct attribute *lm95241_attributes[] = { + &dev_attr_temp1_input.attr, + &dev_attr_temp2_input.attr, + &dev_attr_temp3_input.attr, + &dev_attr_temp2_type.attr, + &dev_attr_temp3_type.attr, + &dev_attr_temp2_min.attr, + &dev_attr_temp3_min.attr, + &dev_attr_temp2_max.attr, + &dev_attr_temp3_max.attr, + &dev_attr_rate.attr, + NULL +}; + +static const struct attribute_group lm95241_group = { + .attrs = lm95241_attributes, +}; + +/* Init/exit code */ +static int lm95241_attach_adapter(struct i2c_adapter *adapter) +{ + if (!(adapter->class & I2C_CLASS_HWMON)) + return 0; + return i2c_probe(adapter, &addr_data, lm95241_detect); +} + +/* + * The following function does more than just detection. If detection + * succeeds, it also registers the new chip. + */ +static int lm95241_detect(struct i2c_adapter *adapter, int address, int kind) +{ + struct i2c_client *new_client; + struct lm95241_data *data; + int err = 0; + const char *name = ""; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + goto exit; + + data = kzalloc(sizeof(struct lm95241_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + /* The common I2C client data is placed right before the + LM95241-specific data. */ + new_client = &data->client; + i2c_set_clientdata(new_client, data); + new_client->addr = address; + new_client->adapter = adapter; + new_client->driver = &lm95241_driver; + new_client->flags = 0; + + /* + * Now we do the remaining detection. A negative kind means that + * the driver was loaded with no force parameter (default), so we + * must both detect and identify the chip. A zero kind means that + * the driver was loaded with the force parameter, the detection + * step shall be skipped. A positive kind means that the driver + * was loaded with the force parameter and a given kind of chip is + * requested, so both the detection and the identification steps + * are skipped. + */ + if (kind < 0) { /* detection */ + if ((i2c_smbus_read_byte_data(new_client, LM95241_REG_R_MAN_ID) + != MANUFACTURER_ID) + || (i2c_smbus_read_byte_data(new_client, LM95241_REG_R_CHIP_ID) + < DEFAULT_REVISION)) { + dev_dbg(&adapter->dev, + "LM95241 detection failed at 0x%02x.\n", + address); + goto exit_free; + } + } + + if (kind <= 0) { /* identification */ + if ((i2c_smbus_read_byte_data(new_client, LM95241_REG_R_MAN_ID) + == MANUFACTURER_ID) + && (i2c_smbus_read_byte_data(new_client, LM95241_REG_R_CHIP_ID) + >= DEFAULT_REVISION)) { + + kind = lm95241; + + if (kind <= 0) { /* identification failed */ + dev_info(&adapter->dev, "Unsupported chip\n"); + goto exit_free; + } + } + } + + if (kind == lm95241) + name = "lm95241"; + + /* We can fill in the remaining client fields */ + strlcpy(new_client->name, name, I2C_NAME_SIZE); + data->valid = 0; + mutex_init(&data->update_lock); + + /* Tell the I2C layer a new client has arrived */ + err = i2c_attach_client(new_client); + if (err) + goto exit_free; + + /* Initialize the LM95241 chip */ + lm95241_init_client(new_client); + + /* Register sysfs hooks */ + err = sysfs_create_group(&new_client->dev.kobj, &lm95241_group); + if (err) + goto exit_detach; + + data->hwmon_dev = hwmon_device_register(&new_client->dev); + if (IS_ERR(data->hwmon_dev)) { + err = PTR_ERR(data->hwmon_dev); + goto exit_remove_files; + } + + return 0; + +exit_remove_files: + sysfs_remove_group(&new_client->dev.kobj, &lm95241_group); +exit_detach: + i2c_detach_client(new_client); +exit_free: + kfree(data); +exit: + return err; +} + +static void lm95241_init_client(struct i2c_client *client) +{ + struct lm95241_data *data = i2c_get_clientdata(client); + + data->rate = HZ; /* 1 sec default */ + data->valid = 0; + data->config = CFG_CR0076; + data->model = 0; + data->trutherm = (TT_OFF << TT1_SHIFT) | (TT_OFF << TT2_SHIFT); + + i2c_smbus_write_byte_data(client, LM95241_REG_RW_CONFIG, + data->config); + i2c_smbus_write_byte_data(client, LM95241_REG_RW_REM_FILTER, + R1FE_MASK | R2FE_MASK); + i2c_smbus_write_byte_data(client, LM95241_REG_RW_TRUTHERM, + data->trutherm); + i2c_smbus_write_byte_data(client, LM95241_REG_RW_REMOTE_MODEL, + data->model); +} + +static int lm95241_detach_client(struct i2c_client *client) +{ + struct lm95241_data *data = i2c_get_clientdata(client); + int err; + + hwmon_device_unregister(data->hwmon_dev); + sysfs_remove_group(&client->dev.kobj, &lm95241_group); + + err = i2c_detach_client(client); + if (err) + return err; + + kfree(data); + return 0; +} + +static struct lm95241_data *lm95241_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct lm95241_data *data = i2c_get_clientdata(client); + + mutex_lock(&data->update_lock); + + if (time_after(jiffies, data->last_updated + data->rate) || + !data->valid) { + dev_dbg(&client->dev, "Updating lm95241 data.\n"); + data->local_h = + i2c_smbus_read_byte_data(client, + LM95241_REG_R_LOCAL_TEMPH); + data->local_l = + i2c_smbus_read_byte_data(client, + LM95241_REG_R_LOCAL_TEMPL); + data->remote1_h = + i2c_smbus_read_byte_data(client, + LM95241_REG_R_REMOTE1_TEMPH); + data->remote1_l = + i2c_smbus_read_byte_data(client, + LM95241_REG_R_REMOTE1_TEMPL); + data->remote2_h = + i2c_smbus_read_byte_data(client, + LM95241_REG_R_REMOTE2_TEMPH); + data->remote2_l = + i2c_smbus_read_byte_data(client, + LM95241_REG_R_REMOTE2_TEMPL); + data->last_updated = jiffies; + data->valid = 1; + } + + mutex_unlock(&data->update_lock); + + return data; +} + +static int __init sensors_lm95241_init(void) +{ + return i2c_add_driver(&lm95241_driver); +} + +static void __exit sensors_lm95241_exit(void) +{ + i2c_del_driver(&lm95241_driver); +} + +MODULE_AUTHOR("Davide Rizzo <elpa-rizzo@gmail.com>"); +MODULE_DESCRIPTION("LM95241 sensor driver"); +MODULE_LICENSE("GPL"); + +module_init(sensors_lm95241_init); +module_exit(sensors_lm95241_exit); diff --git a/drivers/hwmon/ltc4215.c b/drivers/hwmon/ltc4215.c new file mode 100644 index 000000000000..9386e2a39211 --- /dev/null +++ b/drivers/hwmon/ltc4215.c @@ -0,0 +1,364 @@ +/* + * Driver for Linear Technology LTC4215 I2C Hot Swap Controller + * + * Copyright (C) 2009 Ira W. Snyder <iws@ovro.caltech.edu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * Datasheet: + * http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1163,P17572,D12697 + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/hwmon.h> +#include <linux/hwmon-sysfs.h> + +static const unsigned short normal_i2c[] = { I2C_CLIENT_END }; + +/* Insmod parameters */ +I2C_CLIENT_INSMOD_1(ltc4215); + +/* Here are names of the chip's registers (a.k.a. commands) */ +enum ltc4215_cmd { + LTC4215_CONTROL = 0x00, /* rw */ + LTC4215_ALERT = 0x01, /* rw */ + LTC4215_STATUS = 0x02, /* ro */ + LTC4215_FAULT = 0x03, /* rw */ + LTC4215_SENSE = 0x04, /* rw */ + LTC4215_SOURCE = 0x05, /* rw */ + LTC4215_ADIN = 0x06, /* rw */ +}; + +struct ltc4215_data { + struct device *hwmon_dev; + + struct mutex update_lock; + bool valid; + unsigned long last_updated; /* in jiffies */ + + /* Registers */ + u8 regs[7]; +}; + +static struct ltc4215_data *ltc4215_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ltc4215_data *data = i2c_get_clientdata(client); + s32 val; + int i; + + mutex_lock(&data->update_lock); + + /* The chip's A/D updates 10 times per second */ + if (time_after(jiffies, data->last_updated + HZ / 10) || !data->valid) { + + dev_dbg(&client->dev, "Starting ltc4215 update\n"); + + /* Read all registers */ + for (i = 0; i < ARRAY_SIZE(data->regs); i++) { + val = i2c_smbus_read_byte_data(client, i); + if (unlikely(val < 0)) + data->regs[i] = 0; + else + data->regs[i] = val; + } + + data->last_updated = jiffies; + data->valid = 1; + } + + mutex_unlock(&data->update_lock); + + return data; +} + +/* Return the voltage from the given register in millivolts */ +static int ltc4215_get_voltage(struct device *dev, u8 reg) +{ + struct ltc4215_data *data = ltc4215_update_device(dev); + const u8 regval = data->regs[reg]; + u32 voltage = 0; + + switch (reg) { + case LTC4215_SENSE: + /* 151 uV per increment */ + voltage = regval * 151 / 1000; + break; + case LTC4215_SOURCE: + /* 60.5 mV per increment */ + voltage = regval * 605 / 10; + break; + case LTC4215_ADIN: + /* The ADIN input is divided by 12.5, and has 4.82 mV + * per increment, so we have the additional multiply */ + voltage = regval * 482 * 125 / 1000; + break; + default: + /* If we get here, the developer messed up */ + WARN_ON_ONCE(1); + break; + } + + return voltage; +} + +/* Return the current from the sense resistor in mA */ +static unsigned int ltc4215_get_current(struct device *dev) +{ + struct ltc4215_data *data = ltc4215_update_device(dev); + + /* The strange looking conversions that follow are fixed-point + * math, since we cannot do floating point in the kernel. + * + * Step 1: convert sense register to microVolts + * Step 2: convert voltage to milliAmperes + * + * If you play around with the V=IR equation, you come up with + * the following: X uV / Y mOhm == Z mA + * + * With the resistors that are fractions of a milliOhm, we multiply + * the voltage and resistance by 10, to shift the decimal point. + * Now we can use the normal division operator again. + */ + + /* Calculate voltage in microVolts (151 uV per increment) */ + const unsigned int voltage = data->regs[LTC4215_SENSE] * 151; + + /* Calculate current in milliAmperes (4 milliOhm sense resistor) */ + const unsigned int curr = voltage / 4; + + return curr; +} + +static ssize_t ltc4215_show_voltage(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct sensor_device_attribute *attr = to_sensor_dev_attr(da); + const int voltage = ltc4215_get_voltage(dev, attr->index); + + return snprintf(buf, PAGE_SIZE, "%d\n", voltage); +} + +static ssize_t ltc4215_show_current(struct device *dev, + struct device_attribute *da, + char *buf) +{ + const unsigned int curr = ltc4215_get_current(dev); + + return snprintf(buf, PAGE_SIZE, "%u\n", curr); +} + +static ssize_t ltc4215_show_power(struct device *dev, + struct device_attribute *da, + char *buf) +{ + const unsigned int curr = ltc4215_get_current(dev); + const int output_voltage = ltc4215_get_voltage(dev, LTC4215_ADIN); + + /* current in mA * voltage in mV == power in uW */ + const unsigned int power = abs(output_voltage * curr); + + return snprintf(buf, PAGE_SIZE, "%u\n", power); +} + +static ssize_t ltc4215_show_alarm(struct device *dev, + struct device_attribute *da, + char *buf) +{ + struct sensor_device_attribute_2 *attr = to_sensor_dev_attr_2(da); + struct ltc4215_data *data = ltc4215_update_device(dev); + const u8 reg = data->regs[attr->index]; + const u32 mask = attr->nr; + + return snprintf(buf, PAGE_SIZE, "%u\n", (reg & mask) ? 1 : 0); +} + +/* These macros are used below in constructing device attribute objects + * for use with sysfs_create_group() to make a sysfs device file + * for each register. + */ + +#define LTC4215_VOLTAGE(name, ltc4215_cmd_idx) \ + static SENSOR_DEVICE_ATTR(name, S_IRUGO, \ + ltc4215_show_voltage, NULL, ltc4215_cmd_idx) + +#define LTC4215_CURRENT(name) \ + static SENSOR_DEVICE_ATTR(name, S_IRUGO, \ + ltc4215_show_current, NULL, 0); + +#define LTC4215_POWER(name) \ + static SENSOR_DEVICE_ATTR(name, S_IRUGO, \ + ltc4215_show_power, NULL, 0); + +#define LTC4215_ALARM(name, mask, reg) \ + static SENSOR_DEVICE_ATTR_2(name, S_IRUGO, \ + ltc4215_show_alarm, NULL, (mask), reg) + +/* Construct a sensor_device_attribute structure for each register */ + +/* Current */ +LTC4215_CURRENT(curr1_input); +LTC4215_ALARM(curr1_max_alarm, (1 << 2), LTC4215_STATUS); + +/* Power (virtual) */ +LTC4215_POWER(power1_input); +LTC4215_ALARM(power1_alarm, (1 << 3), LTC4215_STATUS); + +/* Input Voltage */ +LTC4215_VOLTAGE(in1_input, LTC4215_ADIN); +LTC4215_ALARM(in1_max_alarm, (1 << 0), LTC4215_STATUS); +LTC4215_ALARM(in1_min_alarm, (1 << 1), LTC4215_STATUS); + +/* Output Voltage */ +LTC4215_VOLTAGE(in2_input, LTC4215_SOURCE); + +/* Finally, construct an array of pointers to members of the above objects, + * as required for sysfs_create_group() + */ +static struct attribute *ltc4215_attributes[] = { + &sensor_dev_attr_curr1_input.dev_attr.attr, + &sensor_dev_attr_curr1_max_alarm.dev_attr.attr, + + &sensor_dev_attr_power1_input.dev_attr.attr, + &sensor_dev_attr_power1_alarm.dev_attr.attr, + + &sensor_dev_attr_in1_input.dev_attr.attr, + &sensor_dev_attr_in1_max_alarm.dev_attr.attr, + &sensor_dev_attr_in1_min_alarm.dev_attr.attr, + + &sensor_dev_attr_in2_input.dev_attr.attr, + + NULL, +}; + +static const struct attribute_group ltc4215_group = { + .attrs = ltc4215_attributes, +}; + +static int ltc4215_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ltc4215_data *data; + int ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto out_kzalloc; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + + /* Initialize the LTC4215 chip */ + /* TODO */ + + /* Register sysfs hooks */ + ret = sysfs_create_group(&client->dev.kobj, <c4215_group); + if (ret) + goto out_sysfs_create_group; + + data->hwmon_dev = hwmon_device_register(&client->dev); + if (IS_ERR(data->hwmon_dev)) { + ret = PTR_ERR(data->hwmon_dev); + goto out_hwmon_device_register; + } + + return 0; + +out_hwmon_device_register: + sysfs_remove_group(&client->dev.kobj, <c4215_group); +out_sysfs_create_group: + kfree(data); +out_kzalloc: + return ret; +} + +static int ltc4215_remove(struct i2c_client *client) +{ + struct ltc4215_data *data = i2c_get_clientdata(client); + + hwmon_device_unregister(data->hwmon_dev); + sysfs_remove_group(&client->dev.kobj, <c4215_group); + + kfree(data); + + return 0; +} + +static int ltc4215_detect(struct i2c_client *client, + int kind, + struct i2c_board_info *info) +{ + struct i2c_adapter *adapter = client->adapter; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -ENODEV; + + if (kind < 0) { /* probed detection - check the chip type */ + s32 v; /* 8 bits from the chip, or -ERRNO */ + + /* + * Register 0x01 bit b7 is reserved, expect 0 + * Register 0x03 bit b6 and b7 are reserved, expect 0 + */ + v = i2c_smbus_read_byte_data(client, LTC4215_ALERT); + if (v < 0 || (v & (1 << 7)) != 0) + return -ENODEV; + + v = i2c_smbus_read_byte_data(client, LTC4215_FAULT); + if (v < 0 || (v & ((1 << 6) | (1 << 7))) != 0) + return -ENODEV; + } + + strlcpy(info->type, "ltc4215", I2C_NAME_SIZE); + dev_info(&adapter->dev, "ltc4215 %s at address 0x%02x\n", + kind < 0 ? "probed" : "forced", + client->addr); + + return 0; +} + +static const struct i2c_device_id ltc4215_id[] = { + { "ltc4215", ltc4215 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ltc4215_id); + +/* This is the driver that will be inserted */ +static struct i2c_driver ltc4215_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "ltc4215", + }, + .probe = ltc4215_probe, + .remove = ltc4215_remove, + .id_table = ltc4215_id, + .detect = ltc4215_detect, + .address_data = &addr_data, +}; + +static int __init ltc4215_init(void) +{ + return i2c_add_driver(<c4215_driver); +} + +static void __exit ltc4215_exit(void) +{ + i2c_del_driver(<c4215_driver); +} + +MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>"); +MODULE_DESCRIPTION("LTC4215 driver"); +MODULE_LICENSE("GPL"); + +module_init(ltc4215_init); +module_exit(ltc4215_exit); diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 640c99207242..cf06494bb744 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -222,7 +222,8 @@ comment "IDE chipset support/bugfixes" config IDE_GENERIC tristate "generic/default IDE chipset support" - depends on ALPHA || X86 || IA64 || M32R || MIPS + depends on ALPHA || X86 || IA64 || M32R || MIPS || ARCH_RPC || ARCH_SHARK + default ARM && (ARCH_RPC || ARCH_SHARK) help This is the generic IDE driver. This driver attaches to the fixed legacy ports (e.g. on PCs 0x1f0/0x170, 0x1e8/0x168 and @@ -680,7 +681,7 @@ endif # TODO: BLK_DEV_IDEDMA_PCI -> BLK_DEV_IDEDMA_SFF config BLK_DEV_IDE_PMAC tristate "PowerMac on-board IDE support" - depends on PPC_PMAC && IDE=y + depends on PPC_PMAC select IDE_TIMINGS select BLK_DEV_IDEDMA_PCI help @@ -731,11 +732,6 @@ config BLK_DEV_IDE_AT91 depends on ARM && ARCH_AT91 && !ARCH_AT91RM9200 && !ARCH_AT91X40 select IDE_TIMINGS -config IDE_ARM - tristate "ARM IDE support" - depends on ARM && (ARCH_RPC || ARCH_SHARK) - default y - config BLK_DEV_IDE_ICSIDE tristate "ICS IDE interface support" depends on ARM && ARCH_ACORN @@ -774,27 +770,20 @@ config BLK_DEV_GAYLE This includes on-board IDE interfaces on some Amiga models (A600, A1200, A4000, and A4000T), and IDE interfaces on the Zorro expansion bus (M-Tech E-Matrix 530 expansion card). - Say Y if you have an Amiga with a Gayle IDE interface and want to use - IDE devices (hard disks, CD-ROM drives, etc.) that are connected to - it. - Note that you also have to enable Zorro bus support if you want to - use Gayle IDE interfaces on the Zorro expansion bus. -config BLK_DEV_IDEDOUBLER - bool "Amiga IDE Doubler support (EXPERIMENTAL)" - depends on BLK_DEV_GAYLE && EXPERIMENTAL - ---help--- - This feature provides support for the so-called `IDE doublers' (made + It also provides support for the so-called `IDE doublers' (made by various manufacturers, e.g. Eyetech) that can be connected to the on-board IDE interface of some Amiga models. Using such an IDE doubler, you can connect up to four instead of two IDE devices to - the Amiga's on-board IDE interface. + the Amiga's on-board IDE interface. The feature is enabled at kernel + runtime using the "gayle.doubler" kernel boot parameter. - Note that the normal Amiga Gayle IDE driver may not work correctly - if you have an IDE doubler and don't enable this feature! + Say Y if you have an Amiga with a Gayle IDE interface and want to use + IDE devices (hard disks, CD-ROM drives, etc.) that are connected to + it. - Say Y if you have an IDE doubler. The feature is enabled at kernel - runtime using the "gayle.doubler" kernel boot parameter. + Note that you also have to enable Zorro bus support if you want to + use Gayle IDE interfaces on the Zorro expansion bus. config BLK_DEV_BUDDHA tristate "Buddha/Catweasel/X-Surf IDE interface support (EXPERIMENTAL)" diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile index 9b4bbe1cdc1a..81df925f0e8b 100644 --- a/drivers/ide/Makefile +++ b/drivers/ide/Makefile @@ -21,8 +21,6 @@ ide-core-$(CONFIG_IDE_LEGACY) += ide-legacy.o obj-$(CONFIG_IDE) += ide-core.o -obj-$(CONFIG_IDE_ARM) += ide_arm.o - obj-$(CONFIG_BLK_DEV_ALI14XX) += ali14xx.o obj-$(CONFIG_BLK_DEV_UMC8672) += umc8672.o obj-$(CONFIG_BLK_DEV_DTC2278) += dtc2278.o diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index d516168464fc..537da1cde16d 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -189,20 +189,20 @@ static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed) } /** - * ali15x3_dma_setup - begin a DMA phase + * ali_dma_check - DMA check * @drive: target device * @cmd: command * * Returns 1 if the DMA cannot be performed, zero on success. */ -static int ali15x3_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) +static int ali_dma_check(ide_drive_t *drive, struct ide_cmd *cmd) { if (m5229_revision < 0xC2 && drive->media != ide_disk) { if (cmd->tf_flags & IDE_TFLAG_WRITE) return 1; /* try PIO instead of DMA */ } - return ide_dma_setup(drive, cmd); + return 0; } /** @@ -503,13 +503,13 @@ static const struct ide_port_ops ali_port_ops = { static const struct ide_dma_ops ali_dma_ops = { .dma_host_set = ide_dma_host_set, - .dma_setup = ali15x3_dma_setup, + .dma_setup = ide_dma_setup, .dma_start = ide_dma_start, .dma_end = ide_dma_end, .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, + .dma_check = ali_dma_check, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c index 27547121daff..8eda552326e9 100644 --- a/drivers/ide/at91_ide.c +++ b/drivers/ide/at91_ide.c @@ -192,15 +192,9 @@ static void at91_ide_tf_load(ide_drive_t *drive, struct ide_cmd *cmd) struct ide_taskfile *tf = &cmd->tf; u8 HIHI = (cmd->tf_flags & IDE_TFLAG_LBA48) ? 0xE0 : 0xEF; - if (cmd->tf_flags & IDE_FTFLAG_FLAGGED) + if (cmd->ftf_flags & IDE_FTFLAG_FLAGGED) HIHI = 0xFF; - if (cmd->tf_flags & IDE_FTFLAG_OUT_DATA) { - u16 data = (tf->hob_data << 8) | tf->data; - - at91_ide_output_data(drive, NULL, &data, 2); - } - if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_FEATURE) ide_mm_outb(tf->hob_feature, io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_NSECT) @@ -233,19 +227,11 @@ static void at91_ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) struct ide_io_ports *io_ports = &hwif->io_ports; struct ide_taskfile *tf = &cmd->tf; - if (cmd->tf_flags & IDE_FTFLAG_IN_DATA) { - u16 data; - - at91_ide_input_data(drive, NULL, &data, 2); - tf->data = data & 0xff; - tf->hob_data = (data >> 8) & 0xff; - } - /* be sure we're looking at the low order bits */ - ide_mm_outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr); + ide_mm_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = ide_mm_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = ide_mm_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = ide_mm_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -258,18 +244,18 @@ static void at91_ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) tf->device = ide_mm_inb(io_ports->device_addr); if (cmd->tf_flags & IDE_TFLAG_LBA48) { - ide_mm_outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr); + ide_mm_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = ide_mm_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = ide_mm_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = ide_mm_inb(io_ports->nsect_addr); + tf->hob_nsect = ide_mm_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = ide_mm_inb(io_ports->lbal_addr); + tf->hob_lbal = ide_mm_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = ide_mm_inb(io_ports->lbam_addr); + tf->hob_lbam = ide_mm_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = ide_mm_inb(io_ports->lbah_addr); + tf->hob_lbah = ide_mm_inb(io_ports->lbah_addr); } } @@ -295,8 +281,9 @@ static const struct ide_tp_ops at91_ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .set_irq = ide_set_irq, + .write_devctl = ide_write_devctl, + .dev_select = ide_dev_select, .tf_load = at91_ide_tf_load, .tf_read = at91_ide_tf_read, diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c index d3a9d6c15328..46013644c965 100644 --- a/drivers/ide/au1xxx-ide.c +++ b/drivers/ide/au1xxx-ide.c @@ -50,7 +50,7 @@ static _auide_hwif auide_hwif; #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_PIO_DBDMA) -void auide_insw(unsigned long port, void *addr, u32 count) +static inline void auide_insw(unsigned long port, void *addr, u32 count) { _auide_hwif *ahwif = &auide_hwif; chan_tab_t *ctp; @@ -68,7 +68,7 @@ void auide_insw(unsigned long port, void *addr, u32 count) ctp->cur_ptr = au1xxx_ddma_get_nextptr_virt(dp); } -void auide_outsw(unsigned long port, void *addr, u32 count) +static inline void auide_outsw(unsigned long port, void *addr, u32 count) { _auide_hwif *ahwif = &auide_hwif; chan_tab_t *ctp; @@ -236,7 +236,7 @@ static int auide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd) if (++count >= PRD_ENTRIES) { printk(KERN_WARNING "%s: DMA table too small\n", drive->name); - goto use_pio_instead; + return 0; } /* Lets enable intr for the last descriptor only */ @@ -272,16 +272,11 @@ static int auide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd) if (count) return 1; - use_pio_instead: - ide_destroy_dmatable(drive); - return 0; /* revert to PIO for this request */ } static int auide_dma_end(ide_drive_t *drive) { - ide_destroy_dmatable(drive); - return 0; } @@ -292,12 +287,9 @@ static void auide_dma_start(ide_drive_t *drive ) static int auide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) { - if (auide_build_dmatable(drive, cmd) == 0) { - ide_map_sg(drive, cmd); + if (auide_build_dmatable(drive, cmd) == 0) return 1; - } - drive->waiting_for_dma = 1; return 0; } @@ -322,16 +314,11 @@ static void auide_dma_host_set(ide_drive_t *drive, int on) static void auide_ddma_tx_callback(int irq, void *param) { - _auide_hwif *ahwif = (_auide_hwif*)param; - ahwif->drive->waiting_for_dma = 0; } static void auide_ddma_rx_callback(int irq, void *param) { - _auide_hwif *ahwif = (_auide_hwif*)param; - ahwif->drive->waiting_for_dma = 0; } - #endif /* end CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA */ static void auide_init_dbdma_dev(dbdev_tab_t *dev, u32 dev_id, u32 tsize, u32 devwidth, u32 flags) @@ -353,7 +340,6 @@ static const struct ide_dma_ops au1xxx_dma_ops = { .dma_end = auide_dma_end, .dma_test_irq = auide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, - .dma_timeout = ide_dma_timeout, }; static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d) @@ -481,9 +467,9 @@ static const struct ide_tp_ops au1xxx_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index bf0e3f470824..80b777e4247b 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -318,7 +318,6 @@ static int cmd646_1_dma_end(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; u8 dma_stat = 0, dma_cmd = 0; - drive->waiting_for_dma = 0; /* get DMA status */ dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS); /* read DMA command state */ @@ -327,8 +326,6 @@ static int cmd646_1_dma_end(ide_drive_t *drive) outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD); /* clear the INTR & ERROR bits */ outb(dma_stat | 6, hwif->dma_base + ATA_DMA_STATUS); - /* and free any DMA resources */ - ide_destroy_dmatable(drive); /* verify good DMA status */ return (dma_stat & 7) != 4; } @@ -384,7 +381,6 @@ static const struct ide_dma_ops cmd64x_dma_ops = { .dma_test_irq = cmd64x_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -396,7 +392,6 @@ static const struct ide_dma_ops cmd646_rev1_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -408,7 +403,6 @@ static const struct ide_dma_ops cmd648_dma_ops = { .dma_test_irq = cmd648_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c index 8e8b35a89901..40bf05eddf6e 100644 --- a/drivers/ide/cs5530.c +++ b/drivers/ide/cs5530.c @@ -92,8 +92,7 @@ static u8 cs5530_udma_filter(ide_drive_t *drive) if ((mateid[ATA_ID_FIELD_VALID] & 4) && (mateid[ATA_ID_UDMA_MODES] & 7)) goto out; - if ((mateid[ATA_ID_FIELD_VALID] & 2) && - (mateid[ATA_ID_MWDMA_MODES] & 7)) + if (mateid[ATA_ID_MWDMA_MODES] & 7) mask = 0; } out: diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c index d5dcf4899607..353a35bbba63 100644 --- a/drivers/ide/cs5536.c +++ b/drivers/ide/cs5536.c @@ -236,7 +236,6 @@ static const struct ide_dma_ops cs5536_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, }; static const struct ide_port_info cs5536_info = { diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c index b368a5effc3a..afa2af9a362b 100644 --- a/drivers/ide/falconide.c +++ b/drivers/ide/falconide.c @@ -89,9 +89,9 @@ static const struct ide_tp_ops falconide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c index dc778251cb05..c7119516c5a7 100644 --- a/drivers/ide/gayle.c +++ b/drivers/ide/gayle.c @@ -53,11 +53,6 @@ #define GAYLE_NEXT_PORT 0x1000 -#ifndef CONFIG_BLK_DEV_IDEDOUBLER -#define GAYLE_NUM_HWIFS 1 -#define GAYLE_NUM_PROBE_HWIFS GAYLE_NUM_HWIFS -#define GAYLE_HAS_CONTROL_REG 1 -#else /* CONFIG_BLK_DEV_IDEDOUBLER */ #define GAYLE_NUM_HWIFS 2 #define GAYLE_NUM_PROBE_HWIFS (ide_doubler ? GAYLE_NUM_HWIFS : \ GAYLE_NUM_HWIFS-1) @@ -66,8 +61,6 @@ static int ide_doubler; module_param_named(doubler, ide_doubler, bool, 0); MODULE_PARM_DESC(doubler, "enable support for IDE doublers"); -#endif /* CONFIG_BLK_DEV_IDEDOUBLER */ - /* * Check and acknowledge the interrupt status @@ -151,10 +144,7 @@ static int __init gayle_init(void) found: printk(KERN_INFO "ide: Gayle IDE controller (A%d style%s)\n", a4000 ? 4000 : 1200, -#ifdef CONFIG_BLK_DEV_IDEDOUBLER - ide_doubler ? ", IDE doubler" : -#endif - ""); + ide_doubler ? ", IDE doubler" : ""); if (a4000) { phys_base = GAYLE_BASE_4000; diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index dbaf184ed9c5..a0eb87f59134 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -835,12 +835,6 @@ static int hpt370_dma_end(ide_drive_t *drive) return ide_dma_end(drive); } -static void hpt370_dma_timeout(ide_drive_t *drive) -{ - hpt370_irq_timeout(drive); - ide_dma_timeout(drive); -} - /* returns 1 if DMA IRQ issued, 0 otherwise */ static int hpt374_dma_test_irq(ide_drive_t *drive) { @@ -1423,7 +1417,6 @@ static const struct ide_dma_ops hpt37x_dma_ops = { .dma_test_irq = hpt374_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -1435,7 +1428,7 @@ static const struct ide_dma_ops hpt370_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = hpt370_dma_timeout, + .dma_clear = hpt370_irq_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -1447,7 +1440,6 @@ static const struct ide_dma_ops hpt36x_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = hpt366_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c index c7e5c2246b79..2fb0f2965009 100644 --- a/drivers/ide/ht6560b.c +++ b/drivers/ide/ht6560b.c @@ -103,7 +103,7 @@ /* * This routine is invoked from ide.c to prepare for access to a given drive. */ -static void ht6560b_selectproc (ide_drive_t *drive) +static void ht6560b_dev_select(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; unsigned long flags; @@ -143,6 +143,8 @@ static void ht6560b_selectproc (ide_drive_t *drive) #endif } local_irq_restore(flags); + + outb(drive->select | ATA_DEVICE_OBS, hwif->io_ports.device_addr); } /* @@ -305,15 +307,29 @@ static int probe_ht6560b; module_param_named(probe, probe_ht6560b, bool, 0); MODULE_PARM_DESC(probe, "probe for HT6560B chipset"); +static const struct ide_tp_ops ht6560b_tp_ops = { + .exec_command = ide_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, + + .dev_select = ht6560b_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, +}; + static const struct ide_port_ops ht6560b_port_ops = { .init_dev = ht6560b_init_dev, .set_pio_mode = ht6560b_set_pio_mode, - .selectproc = ht6560b_selectproc, }; static const struct ide_port_info ht6560b_port_info __initdata = { .name = DRV_NAME, .chipset = ide_ht6560b, + .tp_ops = &ht6560b_tp_ops, .port_ops = &ht6560b_port_ops, .host_flags = IDE_HFLAG_SERIALIZE | /* is this needed? */ IDE_HFLAG_NO_DMA | diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 51ce404fe532..4e16ce68b063 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -287,13 +287,8 @@ static int icside_dma_end(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; struct expansion_card *ec = ECARD_DEV(hwif->dev); - drive->waiting_for_dma = 0; - disable_dma(ec->dma); - /* Teardown mappings after DMA has completed. */ - ide_destroy_dmatable(drive); - return get_dma_residue(ec->dma) != 0; } @@ -346,8 +341,6 @@ static int icside_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) set_dma_sg(ec->dma, hwif->sg_table, cmd->sg_nents); set_dma_mode(ec->dma, dma_mode); - drive->waiting_for_dma = 1; - return 0; } @@ -377,7 +370,6 @@ static const struct ide_dma_ops icside_v6_dma_ops = { .dma_start = icside_dma_start, .dma_end = icside_dma_end, .dma_test_irq = icside_dma_test_irq, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, }; #else diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 2fb5d28a9be5..3e43b889dd64 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -6,6 +6,8 @@ #include <linux/cdrom.h> #include <linux/delay.h> #include <linux/ide.h> +#include <linux/scatterlist.h> + #include <scsi/scsi.h> #ifdef DEBUG @@ -69,56 +71,6 @@ int ide_check_atapi_device(ide_drive_t *drive, const char *s) } EXPORT_SYMBOL_GPL(ide_check_atapi_device); -/* PIO data transfer routine using the scatter gather table. */ -int ide_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount, int write) -{ - ide_hwif_t *hwif = drive->hwif; - const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xf = write ? tp_ops->output_data : tp_ops->input_data; - struct scatterlist *sg = pc->sg; - char *buf; - int count, done = 0; - - while (bcount) { - count = min(sg->length - pc->b_count, bcount); - - if (PageHighMem(sg_page(sg))) { - unsigned long flags; - - local_irq_save(flags); - buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; - xf(drive, NULL, buf + pc->b_count, count); - kunmap_atomic(buf - sg->offset, KM_IRQ0); - local_irq_restore(flags); - } else { - buf = sg_virt(sg); - xf(drive, NULL, buf + pc->b_count, count); - } - - bcount -= count; - pc->b_count += count; - done += count; - - if (pc->b_count == sg->length) { - if (!--pc->sg_cnt) - break; - pc->sg = sg = sg_next(sg); - pc->b_count = 0; - } - } - - if (bcount) { - printk(KERN_ERR "%s: %d leftover bytes, %s\n", drive->name, - bcount, write ? "padding with zeros" - : "discarding data"); - ide_pad_transfer(drive, write, bcount); - } - - return done; -} -EXPORT_SYMBOL_GPL(ide_io_buffers); - void ide_init_pc(struct ide_atapi_pc *pc) { memset(pc, 0, sizeof(*pc)); @@ -324,12 +276,14 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) { struct ide_atapi_pc *pc = drive->pc; ide_hwif_t *hwif = drive->hwif; + struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; xfer_func_t *xferfunc; - unsigned int timeout, temp; + unsigned int timeout, done; u16 bcount; u8 stat, ireason, dsc = 0; + u8 write = !!(pc->flags & PC_FLAG_WRITING); debug_log("Enter %s - interrupt handler\n", __func__); @@ -340,8 +294,13 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) stat = tp_ops->read_status(hwif); if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) { - if (hwif->dma_ops->dma_end(drive) || - (drive->media == ide_tape && (stat & ATA_ERR))) { + int rc; + + drive->waiting_for_dma = 0; + rc = hwif->dma_ops->dma_end(drive); + ide_dma_unmap_sg(drive, cmd); + + if (rc || (drive->media == ide_tape && (stat & ATA_ERR))) { if (drive->media == ide_floppy) printk(KERN_ERR "%s: DMA %s error\n", drive->name, rq_data_dir(pc->rq) @@ -357,7 +316,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) /* No more interrupts */ if ((stat & ATA_DRQ) == 0) { - int uptodate; + int uptodate, error; + unsigned int done; debug_log("Packet command completed, %d bytes transferred\n", pc->xferred); @@ -404,16 +364,24 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (blk_special_request(rq)) { rq->errors = 0; - ide_complete_rq(drive, 0, blk_rq_bytes(rq)); + done = blk_rq_bytes(rq); + error = 0; } else { + if (blk_fs_request(rq) == 0 && uptodate <= 0) { if (rq->errors == 0) rq->errors = -EIO; } - ide_complete_rq(drive, uptodate ? 0 : -EIO, - ide_rq_bytes(rq)); + + if (drive->media == ide_tape) + done = ide_rq_bytes(rq); /* FIXME */ + else + done = blk_rq_bytes(rq); + + error = uptodate ? 0 : -EIO; } + ide_complete_rq(drive, error, done); return ide_stopped; } @@ -433,8 +401,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - if (((ireason & ATAPI_IO) == ATAPI_IO) == - !!(pc->flags & PC_FLAG_WRITING)) { + if (((ireason & ATAPI_IO) == ATAPI_IO) == write) { /* Hopefully, we will never get here */ printk(KERN_ERR "%s: We wanted to %s, but the device wants us " "to %s!\n", drive->name, @@ -443,45 +410,30 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - if (!(pc->flags & PC_FLAG_WRITING)) { - /* Reading - Check that we have enough space */ - temp = pc->xferred + bcount; - if (temp > pc->req_xfer) { - if (temp > pc->buf_size) { - printk(KERN_ERR "%s: The device wants to send " - "us more data than expected - " - "discarding data\n", - drive->name); - - ide_pad_transfer(drive, 0, bcount); - goto next_irq; - } - debug_log("The device wants to send us more data than " - "expected - allowing transfer\n"); - } - xferfunc = tp_ops->input_data; - } else - xferfunc = tp_ops->output_data; - - if ((drive->media == ide_floppy && !pc->buf) || - (drive->media == ide_tape && pc->bh)) { - int done = drive->pc_io_buffers(drive, pc, bcount, - !!(pc->flags & PC_FLAG_WRITING)); - - /* FIXME: don't do partial completions */ - if (drive->media == ide_floppy) - ide_complete_rq(drive, 0, - done ? done : ide_rq_bytes(rq)); - } else - xferfunc(drive, NULL, pc->cur_pos, bcount); + xferfunc = write ? tp_ops->output_data : tp_ops->input_data; + + if (drive->media == ide_floppy && pc->buf == NULL) { + done = min_t(unsigned int, bcount, cmd->nleft); + ide_pio_bytes(drive, cmd, write, done); + } else if (drive->media == ide_tape && pc->bh) { + done = drive->pc_io_buffers(drive, pc, bcount, write); + } else { + done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred); + xferfunc(drive, NULL, pc->cur_pos, done); + } /* Update the current position */ - pc->xferred += bcount; - pc->cur_pos += bcount; + pc->xferred += done; + pc->cur_pos += done; + + bcount -= done; + + if (bcount) + ide_pad_transfer(drive, write, bcount); + + debug_log("[cmd %x] transferred %d bytes, padded %d bytes\n", + rq->cmd[0], done, bcount); - debug_log("[cmd %x] transferred %d bytes on that intr.\n", - rq->cmd[0], bcount); -next_irq: /* And set the interrupt handler again */ ide_set_handler(drive, ide_pc_intr, timeout); return ide_started; @@ -611,6 +563,10 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) : ide_pc_intr), timeout); + /* Send the actual packet */ + if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0) + hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len); + /* Begin DMA, if necessary */ if (dev_is_idecd(drive)) { if (drive->dma) @@ -622,10 +578,6 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) } } - /* Send the actual packet */ - if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0) - hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len); - return ide_started; } @@ -633,7 +585,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) { struct ide_atapi_pc *pc; ide_hwif_t *hwif = drive->hwif; - const struct ide_dma_ops *dma_ops = hwif->dma_ops; ide_expiry_t *expiry = NULL; struct request *rq = hwif->rq; unsigned int timeout; @@ -647,12 +598,8 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) expiry = ide_cd_expiry; timeout = ATAPI_WAIT_PC; - if (drive->dma) { - if (ide_build_sglist(drive, cmd)) - drive->dma = !dma_ops->dma_setup(drive, cmd); - else - drive->dma = 0; - } + if (drive->dma) + drive->dma = !ide_dma_prepare(drive, cmd); } else { pc = drive->pc; @@ -670,13 +617,8 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) ide_dma_off(drive); } - if ((pc->flags & PC_FLAG_DMA_OK) && - (drive->dev_flags & IDE_DFLAG_USING_DMA)) { - if (ide_build_sglist(drive, cmd)) - drive->dma = !dma_ops->dma_setup(drive, cmd); - else - drive->dma = 0; - } + if (pc->flags & PC_FLAG_DMA_OK) + drive->dma = !ide_dma_prepare(drive, cmd); if (!drive->dma) pc->flags &= ~PC_FLAG_DMA_OK; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 3f630e4080d4..35729a47f797 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -4,7 +4,7 @@ * Copyright (C) 1994-1996 Scott Snyder <snyder@fnald0.fnal.gov> * Copyright (C) 1996-1998 Erik Andersen <andersee@debian.org> * Copyright (C) 1998-2000 Jens Axboe <axboe@suse.de> - * Copyright (C) 2005, 2007 Bartlomiej Zolnierkiewicz + * Copyright (C) 2005, 2007-2009 Bartlomiej Zolnierkiewicz * * May be copied or modified under the terms of the GNU General Public * License. See linux/COPYING for more information. @@ -12,12 +12,9 @@ * See Documentation/cdrom/ide-cd for usage information. * * Suggestions are welcome. Patches that work are more welcome though. ;-) - * For those wishing to work on this driver, please be sure you download - * and comply with the latest Mt. Fuji (SFF8090 version 4) and ATAPI - * (SFF-8020i rev 2.6) standards. These documents can be obtained by - * anonymous ftp from: - * ftp://fission.dt.wdc.com/pub/standards/SFF_atapi/spec/SFF8020-r2.6/PS/8020r26.ps - * ftp://ftp.avc-pioneer.com/Mtfuji4/Spec/Fuji4r10.pdf + * + * Documentation: + * Mt. Fuji (SFF8090 version 4) and ATAPI (SFF-8020i rev 2.6) standards. * * For historical changelog please see: * Documentation/ide/ChangeLog.ide-cd.1994-2004 @@ -245,73 +242,34 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); } -static void cdrom_end_request(ide_drive_t *drive, int uptodate) +static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { - struct request *rq = drive->hwif->rq; - int nsectors = rq->hard_cur_sectors; - - ide_debug_log(IDE_DBG_FUNC, "cmd: 0x%x, uptodate: 0x%x, nsectors: %d", - rq->cmd[0], uptodate, nsectors); - - if (blk_sense_request(rq) && uptodate) { - /* - * For REQ_TYPE_SENSE, "rq->buffer" points to the original - * failed request - */ - struct request *failed = (struct request *) rq->buffer; - struct cdrom_info *info = drive->driver_data; - void *sense = &info->sense_data; - - if (failed) { - if (failed->sense) { - sense = failed->sense; - failed->sense_len = rq->sense_len; - } - cdrom_analyze_sense_data(drive, failed, sense); - /* - * now end the failed request - */ - if (blk_fs_request(failed)) { - if (ide_end_rq(drive, failed, -EIO, - failed->hard_nr_sectors << 9)) - BUG(); - } else { - if (blk_end_request(failed, -EIO, - failed->data_len)) - BUG(); - } - } else - cdrom_analyze_sense_data(drive, NULL, sense); - } - - if (!rq->current_nr_sectors && blk_fs_request(rq)) - uptodate = 1; - /* make sure it's fully ended */ - if (blk_pc_request(rq)) - nsectors = (rq->data_len + 511) >> 9; - if (!nsectors) - nsectors = 1; - - ide_debug_log(IDE_DBG_FUNC, "uptodate: 0x%x, nsectors: %d", - uptodate, nsectors); - - if (blk_fs_request(rq) == 0 && uptodate <= 0 && rq->errors == 0) - rq->errors = -EIO; + /* + * For REQ_TYPE_SENSE, "rq->buffer" points to the original + * failed request + */ + struct request *failed = (struct request *)rq->buffer; + struct cdrom_info *info = drive->driver_data; + void *sense = &info->sense_data; - ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9); -} + if (failed) { + if (failed->sense) { + sense = failed->sense; + failed->sense_len = rq->sense_len; + } + cdrom_analyze_sense_data(drive, failed, sense); -static void ide_dump_status_no_sense(ide_drive_t *drive, const char *msg, u8 st) -{ - if (st & 0x80) - return; - ide_dump_status(drive, msg, st); + if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed))) + BUG(); + } else + cdrom_analyze_sense_data(drive, NULL, sense); } /* * Returns: * 0: if the request should be continued. - * 1: if the request was ended. + * 1: if the request will be going through error recovery. + * 2: if the request should be ended. */ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) { @@ -332,12 +290,6 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) err = ide_read_error(drive); sense_key = err >> 4; - if (rq == NULL) { - printk(KERN_ERR PFX "%s: missing rq in %s\n", - drive->name, __func__); - return 1; - } - ide_debug_log(IDE_DBG_RQ, "stat: 0x%x, good_stat: 0x%x, cmd[0]: 0x%x, " "rq->cmd_type: 0x%x, err: 0x%x", stat, good_stat, rq->cmd[0], rq->cmd_type, @@ -350,10 +302,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) * Just give up. */ rq->cmd_flags |= REQ_FAILED; - cdrom_end_request(drive, 0); - ide_error(drive, "request sense failure", stat); - return 1; - + return 2; } else if (blk_pc_request(rq) || rq->cmd_type == REQ_TYPE_ATA_PC) { /* All other functions, except for READ. */ @@ -456,21 +405,19 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) * No point in retrying after an illegal request or data * protect error. */ - ide_dump_status_no_sense(drive, "command error", stat); + ide_dump_status(drive, "command error", stat); do_end_request = 1; } else if (sense_key == MEDIUM_ERROR) { /* * No point in re-trying a zillion times on a bad * sector. If we got here the error is not correctable. */ - ide_dump_status_no_sense(drive, - "media error (bad sector)", - stat); + ide_dump_status(drive, "media error (bad sector)", + stat); do_end_request = 1; } else if (sense_key == BLANK_CHECK) { /* disk appears blank ?? */ - ide_dump_status_no_sense(drive, "media error (blank)", - stat); + ide_dump_status(drive, "media error (blank)", stat); do_end_request = 1; } else if ((err & ~ATA_ABORTED) != 0) { /* go to the default handler for other errors */ @@ -495,14 +442,12 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) */ if (stat & ATA_ERR) cdrom_queue_request_sense(drive, NULL, NULL); + return 1; } else { blk_dump_rq_flags(rq, PFX "bad rq"); - cdrom_end_request(drive, 0); + return 2; } - /* retry, or handle the next request */ - return 1; - end_request: if (stat & ATA_ERR) { struct request_queue *q = drive->queue; @@ -515,10 +460,9 @@ end_request: hwif->rq = NULL; cdrom_queue_request_sense(drive, rq->sense, rq); + return 1; } else - cdrom_end_request(drive, 0); - - return 1; + return 2; } /* @@ -562,101 +506,13 @@ static int ide_cd_check_ireason(ide_drive_t *drive, struct request *rq, if (rq->cmd_type == REQ_TYPE_ATA_PC) rq->cmd_flags |= REQ_FAILED; - cdrom_end_request(drive, 0); return -1; } -/* - * Assume that the drive will always provide data in multiples of at least - * SECTOR_SIZE, as it gets hairy to keep track of the transfers otherwise. - */ -static int ide_cd_check_transfer_size(ide_drive_t *drive, int len) +static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd) { - ide_debug_log(IDE_DBG_FUNC, "len: %d", len); - - if ((len % SECTOR_SIZE) == 0) - return 0; + struct request *rq = cmd->rq; - printk(KERN_ERR PFX "%s: %s: Bad transfer size %d\n", drive->name, - __func__, len); - - if (drive->atapi_flags & IDE_AFLAG_LIMIT_NFRAMES) - printk(KERN_ERR PFX "This drive is not supported by this " - "version of the driver\n"); - else { - printk(KERN_ERR PFX "Trying to limit transfer sizes\n"); - drive->atapi_flags |= IDE_AFLAG_LIMIT_NFRAMES; - } - - return 1; -} - -static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive, - struct request *rq) -{ - ide_debug_log(IDE_DBG_RQ, "rq->cmd_flags: 0x%x", rq->cmd_flags); - - if (rq_data_dir(rq) == READ) { - unsigned short sectors_per_frame = - queue_hardsect_size(drive->queue) >> SECTOR_BITS; - int nskip = rq->sector & (sectors_per_frame - 1); - - /* - * If the requested sector doesn't start on a frame boundary, - * we must adjust the start of the transfer so that it does, - * and remember to skip the first few sectors. - * - * If the rq->current_nr_sectors field is larger than the size - * of the buffer, it will mean that we're to skip a number of - * sectors equal to the amount by which rq->current_nr_sectors - * is larger than the buffer size. - */ - if (nskip > 0) { - /* sanity check... */ - if (rq->current_nr_sectors != - bio_cur_sectors(rq->bio)) { - printk(KERN_ERR PFX "%s: %s: buffer botch (%u)\n", - drive->name, __func__, - rq->current_nr_sectors); - cdrom_end_request(drive, 0); - return ide_stopped; - } - rq->current_nr_sectors += nskip; - } - } - - /* set up the command */ - rq->timeout = ATAPI_WAIT_PC; - - return ide_started; -} - -/* - * Fix up a possibly partially-processed request so that we can start it over - * entirely, or even put it back on the request queue. - */ -static void ide_cd_restore_request(ide_drive_t *drive, struct request *rq) -{ - - ide_debug_log(IDE_DBG_FUNC, "enter"); - - if (rq->buffer != bio_data(rq->bio)) { - sector_t n = - (rq->buffer - (char *)bio_data(rq->bio)) / SECTOR_SIZE; - - rq->buffer = bio_data(rq->bio); - rq->nr_sectors += n; - rq->sector -= n; - } - rq->current_nr_sectors = bio_cur_sectors(rq->bio); - rq->hard_cur_sectors = rq->current_nr_sectors; - rq->hard_nr_sectors = rq->nr_sectors; - rq->hard_sector = rq->sector; - rq->q->prep_rq_fn(rq->q, rq); -} - -static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq) -{ ide_debug_log(IDE_DBG_FUNC, "rq->cmd[0]: 0x%x", rq->cmd[0]); /* @@ -664,11 +520,14 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct request *rq) * and some drives don't send them. Sigh. */ if (rq->cmd[0] == GPCMD_REQUEST_SENSE && - rq->data_len > 0 && rq->data_len <= 5) - while (rq->data_len > 0) { - *(u8 *)rq->data++ = 0; - --rq->data_len; + cmd->nleft > 0 && cmd->nleft <= 5) { + unsigned int ofs = cmd->nbytes - cmd->nleft; + + while (cmd->nleft > 0) { + *((u8 *)rq->data + ofs++) = 0; + cmd->nleft--; } + } } int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, @@ -748,24 +607,26 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, return (flags & REQ_FAILED) ? -EIO : 0; } -/* - * Called from blk_end_request_callback() after the data of the request is - * completed and before the request itself is completed. By returning value '1', - * blk_end_request_callback() returns immediately without completing it. - */ -static int cdrom_newpc_intr_dummy_cb(struct request *rq) +static void ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd) { - return 1; + unsigned int nr_bytes = cmd->nbytes - cmd->nleft; + + if (cmd->tf_flags & IDE_TFLAG_WRITE) + nr_bytes -= cmd->last_xfer_len; + + if (nr_bytes > 0) + ide_complete_rq(drive, 0, nr_bytes); } static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; + struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; - xfer_func_t *xferfunc; ide_expiry_t *expiry = NULL; int dma_error = 0, dma, stat, thislen, uptodate = 0; - int write = (rq_data_dir(rq) == WRITE) ? 1 : 0; + int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc, nsectors; + int sense = blk_sense_request(rq); unsigned int timeout; u16 len; u8 ireason; @@ -777,7 +638,9 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) dma = drive->dma; if (dma) { drive->dma = 0; + drive->waiting_for_dma = 0; dma_error = hwif->dma_ops->dma_end(drive); + ide_dma_unmap_sg(drive, cmd); if (dma_error) { printk(KERN_ERR PFX "%s: DMA %s error\n", drive->name, write ? "write" : "read"); @@ -785,27 +648,24 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) } } - if (cdrom_decode_status(drive, 0, &stat)) + rc = cdrom_decode_status(drive, 0, &stat); + if (rc) { + if (rc == 2) + goto out_end; return ide_stopped; + } /* using dma, transfer is complete now */ if (dma) { if (dma_error) return ide_error(drive, "dma error", stat); - if (blk_fs_request(rq)) { - ide_complete_rq(drive, 0, rq->nr_sectors - ? (rq->nr_sectors << 9) : ide_rq_bytes(rq)); - return ide_stopped; - } else if (rq->cmd_type == REQ_TYPE_ATA_PC && !rq->bio) { - ide_complete_rq(drive, 0, 512); - return ide_stopped; - } - goto end_request; + uptodate = 1; + goto out_end; } ide_read_bcount_and_ireason(drive, &len, &ireason); - thislen = blk_fs_request(rq) ? len : rq->data_len; + thislen = blk_fs_request(rq) ? len : cmd->nleft; if (thislen > len) thislen = len; @@ -820,60 +680,30 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) * Otherwise, complete the command normally. */ uptodate = 1; - if (rq->current_nr_sectors > 0) { + if (cmd->nleft > 0) { printk(KERN_ERR PFX "%s: %s: data underrun " - "(%d blocks)\n", - drive->name, __func__, - rq->current_nr_sectors); + "(%u bytes)\n", drive->name, __func__, + cmd->nleft); if (!write) rq->cmd_flags |= REQ_FAILED; uptodate = 0; } - cdrom_end_request(drive, uptodate); - return ide_stopped; } else if (!blk_pc_request(rq)) { - ide_cd_request_sense_fixup(drive, rq); + ide_cd_request_sense_fixup(drive, cmd); /* complain if we still have data left to transfer */ - uptodate = rq->data_len ? 0 : 1; + uptodate = cmd->nleft ? 0 : 1; + if (uptodate == 0) + rq->cmd_flags |= REQ_FAILED; } - goto end_request; + goto out_end; } /* check which way to transfer data */ - if (ide_cd_check_ireason(drive, rq, len, ireason, write)) - return ide_stopped; + rc = ide_cd_check_ireason(drive, rq, len, ireason, write); + if (rc) + goto out_end; - if (blk_fs_request(rq)) { - if (write == 0) { - int nskip; - - if (ide_cd_check_transfer_size(drive, len)) { - cdrom_end_request(drive, 0); - return ide_stopped; - } - - /* - * First, figure out if we need to bit-bucket - * any of the leading sectors. - */ - nskip = min_t(int, rq->current_nr_sectors - - bio_cur_sectors(rq->bio), - thislen >> 9); - if (nskip > 0) { - ide_pad_transfer(drive, write, nskip << 9); - rq->current_nr_sectors -= nskip; - thislen -= (nskip << 9); - } - } - } - - if (ireason == 0) { - write = 1; - xferfunc = hwif->tp_ops->output_data; - } else { - write = 0; - xferfunc = hwif->tp_ops->input_data; - } + cmd->last_xfer_len = 0; ide_debug_log(IDE_DBG_PC, "data transfer, rq->cmd_type: 0x%x, " "ireason: 0x%x", @@ -881,75 +711,31 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) /* transfer data */ while (thislen > 0) { - u8 *ptr = blk_fs_request(rq) ? NULL : rq->data; - int blen = rq->data_len; - - /* bio backed? */ - if (rq->bio) { - if (blk_fs_request(rq)) { - ptr = rq->buffer; - blen = rq->current_nr_sectors << 9; - } else { - ptr = bio_data(rq->bio); - blen = bio_iovec(rq->bio)->bv_len; - } - } + int blen = min_t(int, thislen, cmd->nleft); - if (!ptr) { - if (blk_fs_request(rq) && !write) - /* - * If the buffers are full, pipe the rest into - * oblivion. - */ - ide_pad_transfer(drive, 0, thislen); - else { - printk(KERN_ERR PFX "%s: confused, missing data\n", - drive->name); - blk_dump_rq_flags(rq, rq_data_dir(rq) - ? "cdrom_newpc_intr, write" - : "cdrom_newpc_intr, read"); - } + if (cmd->nleft == 0) break; - } - - if (blen > thislen) - blen = thislen; - xferfunc(drive, NULL, ptr, blen); + ide_pio_bytes(drive, cmd, write, blen); + cmd->last_xfer_len += blen; thislen -= blen; len -= blen; - if (blk_fs_request(rq)) { - rq->buffer += blen; - rq->nr_sectors -= (blen >> 9); - rq->current_nr_sectors -= (blen >> 9); - rq->sector += (blen >> 9); - - if (rq->current_nr_sectors == 0 && rq->nr_sectors) - cdrom_end_request(drive, 1); - } else { - rq->data_len -= blen; - - /* - * The request can't be completed until DRQ is cleared. - * So complete the data, but don't complete the request - * using the dummy function for the callback feature - * of blk_end_request_callback(). - */ - if (rq->bio) - blk_end_request_callback(rq, 0, blen, - cdrom_newpc_intr_dummy_cb); - else - rq->data += blen; - } - if (!write && blk_sense_request(rq)) + if (sense && write == 0) rq->sense_len += blen; } /* pad, if necessary */ - if (!blk_fs_request(rq) && len > 0) - ide_pad_transfer(drive, write, len); + if (len > 0) { + if (blk_fs_request(rq) == 0 || write == 0) + ide_pad_transfer(drive, write, len); + else { + printk(KERN_ERR PFX "%s: confused, missing data\n", + drive->name); + blk_dump_rq_flags(rq, "cdrom_newpc_intr"); + } + } if (blk_pc_request(rq)) { timeout = rq->timeout; @@ -963,21 +749,50 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) ide_set_handler(drive, cdrom_newpc_intr, timeout); return ide_started; -end_request: - if (blk_pc_request(rq)) { +out_end: + if (blk_pc_request(rq) && rc == 0) { unsigned int dlen = rq->data_len; - if (dma) - rq->data_len = 0; + rq->data_len = 0; if (blk_end_request(rq, 0, dlen)) BUG(); hwif->rq = NULL; } else { - if (!uptodate) - rq->cmd_flags |= REQ_FAILED; - cdrom_end_request(drive, uptodate); + if (sense && uptodate) + ide_cd_complete_failed_rq(drive, rq); + + if (blk_fs_request(rq)) { + if (cmd->nleft == 0) + uptodate = 1; + } else { + if (uptodate <= 0 && rq->errors == 0) + rq->errors = -EIO; + } + + if (uptodate == 0) + ide_cd_error_cmd(drive, cmd); + + /* make sure it's fully ended */ + if (blk_pc_request(rq)) + nsectors = (rq->data_len + 511) >> 9; + else + nsectors = rq->hard_nr_sectors; + + if (nsectors == 0) + nsectors = 1; + + if (blk_fs_request(rq) == 0) { + rq->data_len -= (cmd->nbytes - cmd->nleft); + if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE)) + rq->data_len += cmd->last_xfer_len; + } + + ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9); + + if (sense && rc == 2) + ide_error(drive, "request sense failure", stat); } return ide_stopped; } @@ -985,42 +800,40 @@ end_request: static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) { struct cdrom_info *cd = drive->driver_data; + struct request_queue *q = drive->queue; int write = rq_data_dir(rq) == WRITE; unsigned short sectors_per_frame = - queue_hardsect_size(drive->queue) >> SECTOR_BITS; + queue_hardsect_size(q) >> SECTOR_BITS; - ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, write: 0x%x, " + ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, " "secs_per_frame: %u", - rq->cmd[0], write, sectors_per_frame); + rq->cmd[0], rq->cmd_flags, sectors_per_frame); if (write) { /* disk has become write protected */ - if (get_disk_ro(cd->disk)) { - cdrom_end_request(drive, 0); + if (get_disk_ro(cd->disk)) return ide_stopped; - } } else { /* * We may be retrying this request after an error. Fix up any * weirdness which might be present in the request packet. */ - ide_cd_restore_request(drive, rq); + q->prep_rq_fn(q, rq); } - /* use DMA, if possible / writes *must* be hardware frame aligned */ + /* fs requests *must* be hardware frame aligned */ if ((rq->nr_sectors & (sectors_per_frame - 1)) || - (rq->sector & (sectors_per_frame - 1))) { - if (write) { - cdrom_end_request(drive, 0); - return ide_stopped; - } - drive->dma = 0; - } else - drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); + (rq->sector & (sectors_per_frame - 1))) + return ide_stopped; + + /* use DMA, if possible */ + drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); if (write) cd->devinfo.media_written = 1; + rq->timeout = ATAPI_WAIT_PC; + return ide_started; } @@ -1068,6 +881,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, sector_t block) { struct ide_cmd cmd; + int uptodate = 0, nsectors; ide_debug_log(IDE_DBG_RQ, "cmd: 0x%x, block: %llu", rq->cmd[0], (unsigned long long)block); @@ -1077,10 +891,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, if (blk_fs_request(rq)) { if (cdrom_start_rw(drive, rq) == ide_stopped) - return ide_stopped; - - if (ide_cd_prepare_rw_request(drive, rq) == ide_stopped) - return ide_stopped; + goto out_end; } else if (blk_sense_request(rq) || blk_pc_request(rq) || rq->cmd_type == REQ_TYPE_ATA_PC) { if (!rq->timeout) @@ -1089,12 +900,13 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, cdrom_do_block_pc(drive, rq); } else if (blk_special_request(rq)) { /* right now this can only be a reset... */ - cdrom_end_request(drive, 1); - return ide_stopped; + uptodate = 1; + goto out_end; } else { blk_dump_rq_flags(rq, DRV_NAME " bad flags"); - cdrom_end_request(drive, 0); - return ide_stopped; + if (rq->errors == 0) + rq->errors = -EIO; + goto out_end; } memset(&cmd, 0, sizeof(cmd)); @@ -1104,7 +916,22 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, cmd.rq = rq; + if (blk_fs_request(rq) || rq->data_len) { + ide_init_sg_cmd(&cmd, blk_fs_request(rq) ? (rq->nr_sectors << 9) + : rq->data_len); + ide_map_sg(drive, &cmd); + } + return ide_issue_pc(drive, &cmd); +out_end: + nsectors = rq->hard_nr_sectors; + + if (nsectors == 0) + nsectors = 1; + + ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9); + + return ide_stopped; } /* @@ -1696,9 +1523,6 @@ static const struct ide_proc_devset *ide_cd_proc_devsets(ide_drive_t *drive) #endif static const struct cd_list_entry ide_cd_quirks_list[] = { - /* Limit transfer size per interrupt. */ - { "SAMSUNG CD-ROM SCR-2430", NULL, IDE_AFLAG_LIMIT_NFRAMES }, - { "SAMSUNG CD-ROM SCR-2432", NULL, IDE_AFLAG_LIMIT_NFRAMES }, /* SCR-3231 doesn't support the SET_CD_SPEED command. */ { "SAMSUNG CD-ROM SCR-3231", NULL, IDE_AFLAG_NO_SPEED_SELECT }, /* Old NEC260 (not R) was released before ATAPI 1.2 spec. */ @@ -1759,18 +1583,18 @@ static int ide_cdrom_setup(ide_drive_t *drive) { struct cdrom_info *cd = drive->driver_data; struct cdrom_device_info *cdi = &cd->devinfo; + struct request_queue *q = drive->queue; u16 *id = drive->id; char *fw_rev = (char *)&id[ATA_ID_FW_REV]; int nslots; ide_debug_log(IDE_DBG_PROBE, "enter"); - blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn); - blk_queue_dma_alignment(drive->queue, 31); - blk_queue_update_dma_pad(drive->queue, 15); - drive->queue->unplug_delay = (1 * HZ) / 1000; - if (!drive->queue->unplug_delay) - drive->queue->unplug_delay = 1; + blk_queue_prep_rq(q, ide_cdrom_prep_fn); + blk_queue_dma_alignment(q, 31); + blk_queue_update_dma_pad(q, 15); + + q->unplug_delay = max((1 * HZ) / 1000, 1); drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED; drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id); @@ -1788,8 +1612,7 @@ static int ide_cdrom_setup(ide_drive_t *drive) nslots = ide_cdrom_probe_capabilities(drive); - /* set correct block size */ - blk_queue_hardsect_size(drive->queue, CD_FRAMESIZE); + blk_queue_hardsect_size(q, CD_FRAMESIZE); if (ide_cdrom_register(drive, nslots)) { printk(KERN_ERR PFX "%s: %s failed to register device with the" @@ -1968,9 +1791,6 @@ static struct block_device_operations idecd_ops = { }; /* module options */ -static char *ignore; -module_param(ignore, charp, 0400); - static unsigned long debug_mask; module_param(debug_mask, ulong, 0644); @@ -1991,15 +1811,6 @@ static int ide_cd_probe(ide_drive_t *drive) if (drive->media != ide_cdrom && drive->media != ide_optical) goto failed; - /* skip drives that we were told to ignore */ - if (ignore != NULL) { - if (strstr(ignore, drive->name)) { - printk(KERN_INFO PFX "ignoring drive %s\n", - drive->name); - goto failed; - } - } - drive->debug_mask = debug_mask; drive->irq_handler = cdrom_newpc_intr; diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index ca934c8a1289..c998cf8e971a 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -227,7 +227,7 @@ static u64 idedisk_read_native_max_address(ide_drive_t *drive, int lba48) ide_no_data_taskfile(drive, &cmd); /* if OK, compute maximum address value */ - if ((tf->status & 0x01) == 0) + if (!(tf->status & ATA_ERR)) addr = ide_get_lba_addr(tf, lba48) + 1; return addr; @@ -267,7 +267,7 @@ static u64 idedisk_set_max_address(ide_drive_t *drive, u64 addr_req, int lba48) ide_no_data_taskfile(drive, &cmd); /* if OK, compute maximum address value */ - if ((tf->status & 0x01) == 0) + if (!(tf->status & ATA_ERR)) addr_set = ide_get_lba_addr(tf, lba48) + 1; return addr_set; diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c index 75a9ea2e4c82..16fc46edc32d 100644 --- a/drivers/ide/ide-dma-sff.c +++ b/drivers/ide/ide-dma-sff.c @@ -38,10 +38,9 @@ int config_drive_for_dma(ide_drive_t *drive) * Enable DMA on any drive that has mode2 DMA * (multi or single) enabled */ - if (id[ATA_ID_FIELD_VALID] & 2) /* regular DMA */ - if ((id[ATA_ID_MWDMA_MODES] & 0x404) == 0x404 || - (id[ATA_ID_SWDMA_MODES] & 0x404) == 0x404) - return 1; + if ((id[ATA_ID_MWDMA_MODES] & 0x404) == 0x404 || + (id[ATA_ID_SWDMA_MODES] & 0x404) == 0x404) + return 1; /* Consult the list of known "good" drives */ if (ide_dma_good_drive(drive)) @@ -166,8 +165,6 @@ use_pio_instead: printk(KERN_ERR "%s: %s\n", drive->name, count ? "DMA table too small" : "empty DMA table?"); - ide_destroy_dmatable(drive); - return 0; /* revert to PIO for this request */ } EXPORT_SYMBOL_GPL(ide_build_dmatable); @@ -218,7 +215,6 @@ int ide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) /* clear INTR & ERROR flags */ ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR); - drive->waiting_for_dma = 1; return 0; } EXPORT_SYMBOL_GPL(ide_dma_setup); @@ -292,8 +288,6 @@ int ide_dma_end(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; u8 dma_stat = 0, dma_cmd = 0, mask; - drive->waiting_for_dma = 0; - /* stop DMA */ if (hwif->host_flags & IDE_HFLAG_MMIO) { dma_cmd = readb((void __iomem *)(hwif->dma_base + ATA_DMA_CMD)); @@ -310,8 +304,6 @@ int ide_dma_end(ide_drive_t *drive) /* clear INTR & ERROR bits */ ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR); - /* purge DMA mappings */ - ide_destroy_dmatable(drive); wmb(); /* verify good DMA status */ @@ -338,9 +330,8 @@ const struct ide_dma_ops sff_dma_ops = { .dma_start = ide_dma_start, .dma_end = ide_dma_end, .dma_test_irq = ide_dma_test_irq, - .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, + .dma_timer_expiry = ide_dma_sff_timer_expiry, .dma_sff_read_status = ide_dma_sff_read_status, }; EXPORT_SYMBOL_GPL(sff_dma_ops); diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 3dbf80c15491..a0b8cab1d9a6 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -89,15 +89,16 @@ static const struct drive_list_entry drive_blacklist[] = { ide_startstop_t ide_dma_intr(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; + struct ide_cmd *cmd = &hwif->cmd; u8 stat = 0, dma_stat = 0; + drive->waiting_for_dma = 0; dma_stat = hwif->dma_ops->dma_end(drive); + ide_dma_unmap_sg(drive, cmd); stat = hwif->tp_ops->read_status(hwif); if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) { if (!dma_stat) { - struct ide_cmd *cmd = &hwif->cmd; - if ((cmd->tf_flags & IDE_TFLAG_FS) == 0) ide_finish_cmd(drive, cmd, stat); else @@ -117,8 +118,8 @@ int ide_dma_good_drive(ide_drive_t *drive) } /** - * ide_build_sglist - map IDE scatter gather for DMA I/O - * @drive: the drive to build the DMA table for + * ide_dma_map_sg - map IDE scatter gather for DMA I/O + * @drive: the drive to map the DMA table for * @cmd: command * * Perform the DMA mapping magic necessary to access the source or @@ -127,23 +128,19 @@ int ide_dma_good_drive(ide_drive_t *drive) * operate in a portable fashion. */ -int ide_build_sglist(ide_drive_t *drive, struct ide_cmd *cmd) +static int ide_dma_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) { ide_hwif_t *hwif = drive->hwif; struct scatterlist *sg = hwif->sg_table; int i; - ide_map_sg(drive, cmd); - if (cmd->tf_flags & IDE_TFLAG_WRITE) cmd->sg_dma_direction = DMA_TO_DEVICE; else cmd->sg_dma_direction = DMA_FROM_DEVICE; i = dma_map_sg(hwif->dev, sg, cmd->sg_nents, cmd->sg_dma_direction); - if (i == 0) - ide_map_sg(drive, cmd); - else { + if (i) { cmd->orig_sg_nents = cmd->sg_nents; cmd->sg_nents = i; } @@ -152,7 +149,7 @@ int ide_build_sglist(ide_drive_t *drive, struct ide_cmd *cmd) } /** - * ide_destroy_dmatable - clean up DMA mapping + * ide_dma_unmap_sg - clean up DMA mapping * @drive: The drive to unmap * * Teardown mappings after DMA has completed. This must be called @@ -162,15 +159,14 @@ int ide_build_sglist(ide_drive_t *drive, struct ide_cmd *cmd) * time. */ -void ide_destroy_dmatable(ide_drive_t *drive) +void ide_dma_unmap_sg(ide_drive_t *drive, struct ide_cmd *cmd) { ide_hwif_t *hwif = drive->hwif; - struct ide_cmd *cmd = &hwif->cmd; dma_unmap_sg(hwif->dev, hwif->sg_table, cmd->orig_sg_nents, cmd->sg_dma_direction); } -EXPORT_SYMBOL_GPL(ide_destroy_dmatable); +EXPORT_SYMBOL_GPL(ide_dma_unmap_sg); /** * ide_dma_off_quietly - Generic DMA kill @@ -249,12 +245,11 @@ static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode) case XFER_UDMA_0: if ((id[ATA_ID_FIELD_VALID] & 4) == 0) break; - + mask = id[ATA_ID_UDMA_MODES]; if (port_ops && port_ops->udma_filter) - mask = port_ops->udma_filter(drive); + mask &= port_ops->udma_filter(drive); else - mask = hwif->ultra_mask; - mask &= id[ATA_ID_UDMA_MODES]; + mask &= hwif->ultra_mask; /* * avoid false cable warning from eighty_ninty_three() @@ -265,18 +260,23 @@ static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode) } break; case XFER_MW_DMA_0: - if ((id[ATA_ID_FIELD_VALID] & 2) == 0) - break; + mask = id[ATA_ID_MWDMA_MODES]; + + /* Also look for the CF specific MWDMA modes... */ + if (ata_id_is_cfa(id) && (id[ATA_ID_CFA_MODES] & 0x38)) { + u8 mode = ((id[ATA_ID_CFA_MODES] & 0x38) >> 3) - 1; + + mask |= ((2 << mode) - 1) << 3; + } + if (port_ops && port_ops->mdma_filter) - mask = port_ops->mdma_filter(drive); + mask &= port_ops->mdma_filter(drive); else - mask = hwif->mwdma_mask; - mask &= id[ATA_ID_MWDMA_MODES]; + mask &= hwif->mwdma_mask; break; case XFER_SW_DMA_0: - if (id[ATA_ID_FIELD_VALID] & 2) { - mask = id[ATA_ID_SWDMA_MODES] & hwif->swdma_mask; - } else if (id[ATA_ID_OLD_DMA_MODES] >> 8) { + mask = id[ATA_ID_SWDMA_MODES]; + if (!(mask & ATA_SWDMA2) && (id[ATA_ID_OLD_DMA_MODES] >> 8)) { u8 mode = id[ATA_ID_OLD_DMA_MODES] >> 8; /* @@ -284,8 +284,9 @@ static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode) * (the maximum allowed mode is XFER_SW_DMA_2) */ if (mode <= 2) - mask = ((2 << mode) - 1) & hwif->swdma_mask; + mask = (2 << mode) - 1; } + mask &= hwif->swdma_mask; break; default: BUG(); @@ -402,11 +403,10 @@ int ide_id_dma_bug(ide_drive_t *drive) if ((id[ATA_ID_UDMA_MODES] >> 8) && (id[ATA_ID_MWDMA_MODES] >> 8)) goto err_out; - } else if (id[ATA_ID_FIELD_VALID] & 2) { - if ((id[ATA_ID_MWDMA_MODES] >> 8) && - (id[ATA_ID_SWDMA_MODES] >> 8)) - goto err_out; - } + } else if ((id[ATA_ID_MWDMA_MODES] >> 8) && + (id[ATA_ID_SWDMA_MODES] >> 8)) + goto err_out; + return 0; err_out: printk(KERN_ERR "%s: bad DMA info in identify block\n", drive->name); @@ -460,21 +460,6 @@ void ide_dma_lost_irq(ide_drive_t *drive) } EXPORT_SYMBOL_GPL(ide_dma_lost_irq); -void ide_dma_timeout(ide_drive_t *drive) -{ - ide_hwif_t *hwif = drive->hwif; - - printk(KERN_ERR "%s: timeout waiting for DMA\n", drive->name); - - if (hwif->dma_ops->dma_test_irq(drive)) - return; - - ide_dump_status(drive, "DMA timeout", hwif->tp_ops->read_status(hwif)); - - hwif->dma_ops->dma_end(drive); -} -EXPORT_SYMBOL_GPL(ide_dma_timeout); - /* * un-busy the port etc, and clear any pending DMA status. we want to * retry the current request in pio mode instead of risking tossing it @@ -483,6 +468,8 @@ EXPORT_SYMBOL_GPL(ide_dma_timeout); ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) { ide_hwif_t *hwif = drive->hwif; + const struct ide_dma_ops *dma_ops = hwif->dma_ops; + struct ide_cmd *cmd = &hwif->cmd; struct request *rq; ide_startstop_t ret = ide_stopped; @@ -492,12 +479,23 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) if (error < 0) { printk(KERN_WARNING "%s: DMA timeout error\n", drive->name); - (void)hwif->dma_ops->dma_end(drive); + drive->waiting_for_dma = 0; + (void)dma_ops->dma_end(drive); + ide_dma_unmap_sg(drive, cmd); ret = ide_error(drive, "dma timeout error", hwif->tp_ops->read_status(hwif)); } else { printk(KERN_WARNING "%s: DMA timeout retry\n", drive->name); - hwif->dma_ops->dma_timeout(drive); + if (dma_ops->dma_clear) + dma_ops->dma_clear(drive); + printk(KERN_ERR "%s: timeout waiting for DMA\n", drive->name); + if (dma_ops->dma_test_irq(drive) == 0) { + ide_dump_status(drive, "DMA timeout", + hwif->tp_ops->read_status(hwif)); + drive->waiting_for_dma = 0; + (void)dma_ops->dma_end(drive); + ide_dma_unmap_sg(drive, cmd); + } } /* @@ -567,3 +565,25 @@ int ide_allocate_dma_engine(ide_hwif_t *hwif) return 0; } EXPORT_SYMBOL_GPL(ide_allocate_dma_engine); + +int ide_dma_prepare(ide_drive_t *drive, struct ide_cmd *cmd) +{ + const struct ide_dma_ops *dma_ops = drive->hwif->dma_ops; + + if ((drive->dev_flags & IDE_DFLAG_USING_DMA) == 0 || + (dma_ops->dma_check && dma_ops->dma_check(drive, cmd))) + goto out; + ide_map_sg(drive, cmd); + if (ide_dma_map_sg(drive, cmd) == 0) + goto out_map; + if (dma_ops->dma_setup(drive, cmd)) + goto out_dma_unmap; + drive->waiting_for_dma = 1; + return 0; +out_dma_unmap: + ide_dma_unmap_sg(drive, cmd); +out_map: + ide_map_sg(drive, cmd); +out: + return 1; +} diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 11664976eea3..5d5fb961b5ce 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -165,11 +165,12 @@ static ide_startstop_t do_reset1(ide_drive_t *, int); static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; + const struct ide_tp_ops *tp_ops = hwif->tp_ops; u8 stat; - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); udelay(10); - stat = hwif->tp_ops->read_status(hwif); + stat = tp_ops->read_status(hwif); if (OK_STAT(stat, 0, ATA_BUSY)) printk(KERN_INFO "%s: ATAPI reset complete\n", drive->name); @@ -348,7 +349,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi) /* For an ATAPI device, first try an ATAPI SRST. */ if (drive->media != ide_disk && !do_not_try_atapi) { pre_reset(drive); - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); udelay(20); tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); ndelay(400); @@ -401,15 +402,14 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi) * immediate interrupt due to the edge transition it produces. * This single interrupt gives us a "fast poll" for drives that * recover from reset very quickly, saving us the first 50ms wait time. - * - * TODO: add ->softreset method and stop abusing ->set_irq */ /* set SRST and nIEN */ - tp_ops->set_irq(hwif, 4); + tp_ops->write_devctl(hwif, ATA_SRST | ATA_NIEN | ATA_DEVCTL_OBS); /* more than enough time */ udelay(10); /* clear SRST, leave nIEN (unless device is on the quirk list) */ - tp_ops->set_irq(hwif, drive->quirk_list == 2); + tp_ops->write_devctl(hwif, (drive->quirk_list == 2 ? 0 : ATA_NIEN) | + ATA_DEVCTL_OBS); /* more than enough time */ udelay(10); hwif->poll_timeout = jiffies + WAIT_WORSTCASE; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 7ae662334835..2b4868d95f8b 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -61,16 +61,6 @@ */ #define IDEFLOPPY_PC_DELAY (HZ/20) /* default delay for ZIP 100 (50ms) */ -static void idefloppy_update_buffers(ide_drive_t *drive, - struct ide_atapi_pc *pc) -{ - struct request *rq = pc->rq; - struct bio *bio = rq->bio; - - while ((bio = rq->bio) != NULL) - ide_complete_rq(drive, 0, ide_rq_bytes(rq)); -} - static int ide_floppy_callback(ide_drive_t *drive, int dsc) { struct ide_disk_obj *floppy = drive->driver_data; @@ -213,7 +203,6 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive, memcpy(rq->cmd, pc->c, 12); pc->rq = rq; - pc->b_count = 0; if (rq->cmd_flags & REQ_RW) pc->flags |= PC_FLAG_WRITING; pc->buf = NULL; @@ -227,7 +216,6 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy, ide_init_pc(pc); memcpy(pc->c, rq->cmd, sizeof(pc->c)); pc->rq = rq; - pc->b_count = 0; if (rq->data_len && rq_data_dir(rq) == WRITE) pc->flags |= PC_FLAG_WRITING; pc->buf = rq->data; @@ -244,10 +232,11 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, struct request *rq, sector_t block) { struct ide_disk_obj *floppy = drive->driver_data; - ide_hwif_t *hwif = drive->hwif; struct ide_cmd cmd; struct ide_atapi_pc *pc; + ide_debug_log(IDE_DBG_FUNC, "enter, cmd: 0x%x\n", rq->cmd[0]); + if (drive->debug_mask & IDE_DBG_RQ) blk_dump_rq_flags(rq, (rq->rq_disk ? rq->rq_disk->disk_name @@ -294,13 +283,10 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, cmd.rq = rq; if (blk_fs_request(rq) || pc->req_xfer) { - ide_init_sg_cmd(&cmd, rq->nr_sectors << 9); + ide_init_sg_cmd(&cmd, pc->req_xfer); ide_map_sg(drive, &cmd); } - pc->sg = hwif->sg_table; - pc->sg_cnt = cmd.sg_nents; - pc->rq = rq; return ide_floppy_issue_pc(drive, &cmd, pc); @@ -385,9 +371,11 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) struct gendisk *disk = floppy->disk; struct ide_atapi_pc pc; u8 *cap_desc; - u8 header_len, desc_cnt; + u8 pc_buf[256], header_len, desc_cnt; int i, rc = 1, blocks, length; + ide_debug_log(IDE_DBG_FUNC, "enter"); + drive->bios_cyl = 0; drive->bios_head = drive->bios_sect = 0; floppy->blocks = 0; @@ -395,6 +383,9 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) drive->capacity64 = 0; ide_floppy_create_read_capacity_cmd(&pc); + pc.buf = &pc_buf[0]; + pc.buf_size = sizeof(pc_buf); + if (ide_queue_pc_tail(drive, disk, &pc)) { printk(KERN_ERR PFX "Can't get floppy parameters\n"); return 1; @@ -485,8 +476,6 @@ static void ide_floppy_setup(ide_drive_t *drive) u16 *id = drive->id; drive->pc_callback = ide_floppy_callback; - drive->pc_update_buffers = idefloppy_update_buffers; - drive->pc_io_buffers = ide_io_buffers; /* * We used to check revisions here. At this point however I'm giving up. diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 8f8be8546038..cd8a42027ede 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -36,9 +36,9 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) { struct ide_disk_obj *floppy = drive->driver_data; - u8 header_len, desc_cnt; int i, blocks, length, u_array_size, u_index; int __user *argp; + u8 pc_buf[256], header_len, desc_cnt; if (get_user(u_array_size, arg)) return -EFAULT; @@ -47,6 +47,9 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, return -EINVAL; ide_floppy_create_read_capacity_cmd(pc); + pc->buf = &pc_buf[0]; + pc->buf_size = sizeof(pc_buf); + if (ide_queue_pc_tail(drive, floppy->disk, pc)) { printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); return -EIO; diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c index 9d03e8211536..7812ca0be13b 100644 --- a/drivers/ide/ide-generic.c +++ b/drivers/ide/ide-generic.c @@ -1,27 +1,22 @@ /* * generic/default IDE host driver * - * Copyright (C) 2004, 2008 Bartlomiej Zolnierkiewicz + * Copyright (C) 2004, 2008-2009 Bartlomiej Zolnierkiewicz * This code was split off from ide.c. See it for original copyrights. * * May be copied or modified under the terms of the GNU General Public License. */ -/* - * For special cases new interfaces may be added using sysfs, i.e. - * - * echo -n "0x168:0x36e:10" > /sys/class/ide_generic/add - * - * will add an interface using I/O ports 0x168-0x16f/0x36e and IRQ 10. - */ - #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/ide.h> #include <linux/pci_ids.h> -/* FIXME: convert m32r to use ide_platform host driver */ +/* FIXME: convert arm and m32r to use ide_platform host driver */ +#ifdef CONFIG_ARM +#include <asm/irq.h> +#endif #ifdef CONFIG_M32R #include <asm/m32r.h> #endif @@ -36,62 +31,11 @@ static const struct ide_port_info ide_generic_port_info = { .host_flags = IDE_HFLAG_NO_DMA, }; -static ssize_t store_add(struct class *cls, const char *buf, size_t n) -{ - unsigned int base, ctl; - int irq, rc; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; - - if (sscanf(buf, "%x:%x:%d", &base, &ctl, &irq) != 3) - return -EINVAL; - - memset(&hw, 0, sizeof(hw)); - ide_std_init_ports(&hw, base, ctl); - hw.irq = irq; - hw.chipset = ide_generic; - - rc = ide_host_add(&ide_generic_port_info, hws, NULL); - if (rc) - return rc; - - return n; -}; - -static struct class_attribute ide_generic_class_attrs[] = { - __ATTR(add, S_IWUSR, NULL, store_add), - __ATTR_NULL -}; - -static void ide_generic_class_release(struct class *cls) -{ - kfree(cls); -} - -static int __init ide_generic_sysfs_init(void) -{ - struct class *cls; - int rc; - - cls = kzalloc(sizeof(*cls), GFP_KERNEL); - if (!cls) - return -ENOMEM; - - cls->name = DRV_NAME; - cls->owner = THIS_MODULE; - cls->class_release = ide_generic_class_release; - cls->class_attrs = ide_generic_class_attrs; - - rc = class_register(cls); - if (rc) { - kfree(cls); - return rc; - } - - return 0; -} - -#if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_MAPPI2) \ - || defined(CONFIG_PLAT_OPSPUT) +#ifdef CONFIG_ARM +static const u16 legacy_bases[] = { 0x1f0 }; +static const int legacy_irqs[] = { IRQ_HARDDISK }; +#elif defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_MAPPI2) || \ + defined(CONFIG_PLAT_OPSPUT) static const u16 legacy_bases[] = { 0x1f0 }; static const int legacy_irqs[] = { PLD_IRQ_CFIREQ }; #elif defined(CONFIG_PLAT_MAPPI3) @@ -107,11 +51,11 @@ static const int legacy_irqs[] = { 14, 15, 11, 10, 8, 12 }; static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary) { +#ifdef CONFIG_PCI struct pci_dev *p = NULL; u16 val; for_each_pci_dev(p) { - if (pci_resource_start(p, 0) == 0x1f0) *primary = 1; if (pci_resource_start(p, 2) == 0x170) @@ -126,7 +70,6 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary) /* Intel MPIIX - PIO ATA on non PCI side of bridge */ if (p->vendor == PCI_VENDOR_ID_INTEL && p->device == PCI_DEVICE_ID_INTEL_82371MX) { - pci_read_config_word(p, 0x6C, &val); if (val & 0x8000) { /* ATA port enabled */ @@ -137,6 +80,7 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary) } } } +#endif } static int __init ide_generic_init(void) @@ -168,6 +112,7 @@ static int __init ide_generic_init(void) printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX " "not free.\n", DRV_NAME, io_addr, io_addr + 7); + rc = -EBUSY; continue; } @@ -176,6 +121,7 @@ static int __init ide_generic_init(void) "not free.\n", DRV_NAME, io_addr + 0x206); release_region(io_addr, 8); + rc = -EBUSY; continue; } @@ -196,10 +142,6 @@ static int __init ide_generic_init(void) } } - if (ide_generic_sysfs_init()) - printk(KERN_ERR DRV_NAME ": failed to create ide_generic " - "class\n"); - return rc; } diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c index ff8339ed59ab..dac9a6d44963 100644 --- a/drivers/ide/ide-h8300.c +++ b/drivers/ide/ide-h8300.c @@ -54,9 +54,6 @@ static void h8300_tf_load(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->ftf_flags & IDE_FTFLAG_FLAGGED) HIHI = 0xFF; - if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) - mm_outw((tf->hob_data << 8) | tf->data, io_ports->data_addr); - if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_FEATURE) outb(tf->hob_feature, io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_NSECT) @@ -90,18 +87,11 @@ static void h8300_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) struct ide_io_ports *io_ports = &hwif->io_ports; struct ide_taskfile *tf = &cmd->tf; - if (cmd->ftf_flags & IDE_FTFLAG_IN_DATA) { - u16 data = mm_inw(io_ports->data_addr); - - tf->data = data & 0xff; - tf->hob_data = (data >> 8) & 0xff; - } - /* be sure we're looking at the low order bits */ - outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr); + outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -114,18 +104,18 @@ static void h8300_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) tf->device = inb(io_ports->device_addr); if (cmd->tf_flags & IDE_TFLAG_LBA48) { - outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr); + outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = inb(io_ports->nsect_addr); + tf->hob_nsect = inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = inb(io_ports->lbal_addr); + tf->hob_lbal = inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = inb(io_ports->lbam_addr); + tf->hob_lbam = inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = inb(io_ports->lbah_addr); + tf->hob_lbah = inb(io_ports->lbah_addr); } } @@ -159,9 +149,9 @@ static const struct ide_tp_ops h8300_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ide_dev_select, .tf_load = h8300_tf_load, .tf_read = h8300_tf_read, diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c index 2d9c6dc3f956..9cac281d82c4 100644 --- a/drivers/ide/ide-io-std.c +++ b/drivers/ide/ide-io-std.c @@ -64,23 +64,26 @@ u8 ide_read_altstatus(ide_hwif_t *hwif) } EXPORT_SYMBOL_GPL(ide_read_altstatus); -void ide_set_irq(ide_hwif_t *hwif, int on) +void ide_write_devctl(ide_hwif_t *hwif, u8 ctl) { - u8 ctl = ATA_DEVCTL_OBS; - - if (on == 4) { /* hack for SRST */ - ctl |= 4; - on &= ~4; - } - - ctl |= on ? 0 : 2; - if (hwif->host_flags & IDE_HFLAG_MMIO) writeb(ctl, (void __iomem *)hwif->io_ports.ctl_addr); else outb(ctl, hwif->io_ports.ctl_addr); } -EXPORT_SYMBOL_GPL(ide_set_irq); +EXPORT_SYMBOL_GPL(ide_write_devctl); + +void ide_dev_select(ide_drive_t *drive) +{ + ide_hwif_t *hwif = drive->hwif; + u8 select = drive->select | ATA_DEVICE_OBS; + + if (hwif->host_flags & IDE_HFLAG_MMIO) + writeb(select, (void __iomem *)hwif->io_ports.device_addr); + else + outb(select, hwif->io_ports.device_addr); +} +EXPORT_SYMBOL_GPL(ide_dev_select); void ide_tf_load(ide_drive_t *drive, struct ide_cmd *cmd) { @@ -99,15 +102,6 @@ void ide_tf_load(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->ftf_flags & IDE_FTFLAG_FLAGGED) HIHI = 0xFF; - if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) { - u16 data = (tf->hob_data << 8) | tf->data; - - if (mmio) - writew(data, (void __iomem *)io_ports->data_addr); - else - outw(data, io_ports->data_addr); - } - if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_FEATURE) tf_outb(tf->hob_feature, io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_NSECT) @@ -153,23 +147,11 @@ void ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) tf_inb = ide_inb; } - if (cmd->ftf_flags & IDE_FTFLAG_IN_DATA) { - u16 data; - - if (mmio) - data = readw((void __iomem *)io_ports->data_addr); - else - data = inw(io_ports->data_addr); - - tf->data = data & 0xff; - tf->hob_data = (data >> 8) & 0xff; - } - /* be sure we're looking at the low order bits */ - tf_outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr); + tf_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = tf_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = tf_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = tf_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -182,18 +164,18 @@ void ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) tf->device = tf_inb(io_ports->device_addr); if (cmd->tf_flags & IDE_TFLAG_LBA48) { - tf_outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr); + tf_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = tf_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = tf_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = tf_inb(io_ports->nsect_addr); + tf->hob_nsect = tf_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = tf_inb(io_ports->lbal_addr); + tf->hob_lbal = tf_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = tf_inb(io_ports->lbam_addr); + tf->hob_lbam = tf_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = tf_inb(io_ports->lbah_addr); + tf->hob_lbah = tf_inb(io_ports->lbah_addr); } } EXPORT_SYMBOL_GPL(ide_tf_read); @@ -225,11 +207,10 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; unsigned long data_addr = io_ports->data_addr; + unsigned int words = (len + 1) >> 1; u8 io_32bit = drive->io_32bit; u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0; - len++; - if (io_32bit) { unsigned long uninitialized_var(flags); @@ -238,27 +219,26 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, ata_vlb_sync(io_ports->nsect_addr); } + words >>= 1; if (mmio) - __ide_mm_insl((void __iomem *)data_addr, buf, len / 4); + __ide_mm_insl((void __iomem *)data_addr, buf, words); else - insl(data_addr, buf, len / 4); + insl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) local_irq_restore(flags); - if ((len & 3) >= 2) { - if (mmio) - __ide_mm_insw((void __iomem *)data_addr, - (u8 *)buf + (len & ~3), 1); - else - insw(data_addr, (u8 *)buf + (len & ~3), 1); - } - } else { - if (mmio) - __ide_mm_insw((void __iomem *)data_addr, buf, len / 2); - else - insw(data_addr, buf, len / 2); + if (((len + 1) & 3) < 2) + return; + + buf += len & ~3; + words = 1; } + + if (mmio) + __ide_mm_insw((void __iomem *)data_addr, buf, words); + else + insw(data_addr, buf, words); } EXPORT_SYMBOL_GPL(ide_input_data); @@ -271,11 +251,10 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; unsigned long data_addr = io_ports->data_addr; + unsigned int words = (len + 1) >> 1; u8 io_32bit = drive->io_32bit; u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0; - len++; - if (io_32bit) { unsigned long uninitialized_var(flags); @@ -284,27 +263,26 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, ata_vlb_sync(io_ports->nsect_addr); } + words >>= 1; if (mmio) - __ide_mm_outsl((void __iomem *)data_addr, buf, len / 4); + __ide_mm_outsl((void __iomem *)data_addr, buf, words); else - outsl(data_addr, buf, len / 4); + outsl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) local_irq_restore(flags); - if ((len & 3) >= 2) { - if (mmio) - __ide_mm_outsw((void __iomem *)data_addr, - (u8 *)buf + (len & ~3), 1); - else - outsw(data_addr, (u8 *)buf + (len & ~3), 1); - } - } else { - if (mmio) - __ide_mm_outsw((void __iomem *)data_addr, buf, len / 2); - else - outsw(data_addr, buf, len / 2); + if (((len + 1) & 3) < 2) + return; + + buf += len & ~3; + words = 1; } + + if (mmio) + __ide_mm_outsw((void __iomem *)data_addr, buf, words); + else + outsw(data_addr, buf, words); } EXPORT_SYMBOL_GPL(ide_output_data); @@ -312,9 +290,9 @@ const struct ide_tp_ops default_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 1adc5e2e7fb3..1deb6d29b186 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -73,6 +73,7 @@ EXPORT_SYMBOL_GPL(ide_end_rq); void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err) { + const struct ide_tp_ops *tp_ops = drive->hwif->tp_ops; struct ide_taskfile *tf = &cmd->tf; struct request *rq = cmd->rq; u8 tf_cmd = tf->command; @@ -80,7 +81,16 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err) tf->error = err; tf->status = stat; - drive->hwif->tp_ops->tf_read(drive, cmd); + if (cmd->ftf_flags & IDE_FTFLAG_IN_DATA) { + u8 data[2]; + + tp_ops->input_data(drive, cmd, data, 2); + + tf->data = data[0]; + tf->hob_data = data[1]; + } + + tp_ops->tf_read(drive, cmd); if ((cmd->tf_flags & IDE_TFLAG_CUSTOM_HANDLER) && tf_cmd == ATA_CMD_IDLEIMMEDIATE) { @@ -338,7 +348,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (blk_pm_request(rq)) ide_check_pm_state(drive, rq); - SELECT_DRIVE(drive); + drive->hwif->tp_ops->dev_select(drive); if (ide_wait_stat(&startstop, drive, drive->ready_stat, ATA_BUSY | ATA_DRQ, WAIT_READY)) { printk(KERN_ERR "%s: drive not ready for command\n", drive->name); @@ -481,11 +491,10 @@ repeat: prev_port = hwif->host->cur_port; hwif->rq = NULL; - if (drive->dev_flags & IDE_DFLAG_SLEEPING) { - if (time_before(drive->sleep, jiffies)) { - ide_unlock_port(hwif); - goto plug_device; - } + if (drive->dev_flags & IDE_DFLAG_SLEEPING && + time_after(drive->sleep, jiffies)) { + ide_unlock_port(hwif); + goto plug_device; } if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) && @@ -495,7 +504,9 @@ repeat: * quirk_list may not like intr setups/cleanups */ if (prev_port && prev_port->cur_dev->quirk_list == 0) - prev_port->tp_ops->set_irq(prev_port, 0); + prev_port->tp_ops->write_devctl(prev_port, + ATA_NIEN | + ATA_DEVCTL_OBS); hwif->host->cur_port = hwif; } diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 5403e4a44be4..27bb70ddd459 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -27,21 +27,6 @@ #include <asm/uaccess.h> #include <asm/io.h> -void SELECT_DRIVE(ide_drive_t *drive) -{ - ide_hwif_t *hwif = drive->hwif; - const struct ide_port_ops *port_ops = hwif->port_ops; - struct ide_cmd cmd; - - if (port_ops && port_ops->selectproc) - port_ops->selectproc(drive); - - memset(&cmd, 0, sizeof(cmd)); - cmd.tf_flags = IDE_TFLAG_OUT_DEVICE; - - drive->hwif->tp_ops->tf_load(drive, &cmd); -} - void SELECT_MASK(ide_drive_t *drive, int mask) { const struct ide_port_ops *port_ops = drive->hwif->port_ops; @@ -55,7 +40,7 @@ u8 ide_read_error(ide_drive_t *drive) struct ide_cmd cmd; memset(&cmd, 0, sizeof(cmd)); - cmd.tf_flags = IDE_TFLAG_IN_FEATURE; + cmd.tf_flags = IDE_TFLAG_IN_ERROR; drive->hwif->tp_ops->tf_read(drive, &cmd); @@ -306,6 +291,7 @@ int ide_driveid_update(ide_drive_t *drive) drive->id[ATA_ID_UDMA_MODES] = id[ATA_ID_UDMA_MODES]; drive->id[ATA_ID_MWDMA_MODES] = id[ATA_ID_MWDMA_MODES]; drive->id[ATA_ID_SWDMA_MODES] = id[ATA_ID_SWDMA_MODES]; + drive->id[ATA_ID_CFA_MODES] = id[ATA_ID_CFA_MODES]; /* anything more ? */ kfree(id); @@ -356,10 +342,10 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) disable_irq_nosync(hwif->irq); udelay(1); - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); SELECT_MASK(drive, 1); udelay(1); - tp_ops->set_irq(hwif, 0); + tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS); memset(&cmd, 0, sizeof(cmd)); cmd.tf_flags = IDE_TFLAG_OUT_FEATURE | IDE_TFLAG_OUT_NSECT; @@ -371,7 +357,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES); if (drive->quirk_list == 2) - tp_ops->set_irq(hwif, 1); + tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); error = __ide_wait_stat(drive, drive->ready_stat, ATA_BUSY | ATA_DRQ | ATA_ERR, @@ -386,9 +372,14 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) return error; } - id[ATA_ID_UDMA_MODES] &= ~0xFF00; - id[ATA_ID_MWDMA_MODES] &= ~0x0F00; - id[ATA_ID_SWDMA_MODES] &= ~0x0F00; + if (speed >= XFER_SW_DMA_0) { + id[ATA_ID_UDMA_MODES] &= ~0xFF00; + id[ATA_ID_MWDMA_MODES] &= ~0x0700; + id[ATA_ID_SWDMA_MODES] &= ~0x0700; + if (ata_id_is_cfa(id)) + id[ATA_ID_CFA_MODES] &= ~0x0E00; + } else if (ata_id_is_cfa(id)) + id[ATA_ID_CFA_MODES] &= ~0x01C0; skip: #ifdef CONFIG_BLK_DEV_IDEDMA @@ -401,12 +392,18 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) if (speed >= XFER_UDMA_0) { i = 1 << (speed - XFER_UDMA_0); id[ATA_ID_UDMA_MODES] |= (i << 8 | i); + } else if (ata_id_is_cfa(id) && speed >= XFER_MW_DMA_3) { + i = speed - XFER_MW_DMA_2; + id[ATA_ID_CFA_MODES] |= i << 9; } else if (speed >= XFER_MW_DMA_0) { i = 1 << (speed - XFER_MW_DMA_0); id[ATA_ID_MWDMA_MODES] |= (i << 8 | i); } else if (speed >= XFER_SW_DMA_0) { i = 1 << (speed - XFER_SW_DMA_0); id[ATA_ID_SWDMA_MODES] |= (i << 8 | i); + } else if (ata_id_is_cfa(id) && speed >= XFER_PIO_5) { + i = speed - XFER_PIO_4; + id[ATA_ID_CFA_MODES] |= i << 6; } if (!drive->init_speed) diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index ebf2d21ebdcb..bb7858ebb7d1 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -223,6 +223,7 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) * point. */ ide_hwif_t *hwif = drive->hwif; + const struct ide_tp_ops *tp_ops = hwif->tp_ops; struct request_queue *q = drive->queue; unsigned long flags; int rc; @@ -232,8 +233,8 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) rc = ide_wait_not_busy(hwif, 35000); if (rc) printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name); - SELECT_DRIVE(drive); - hwif->tp_ops->set_irq(hwif, 1); + tp_ops->dev_select(drive); + tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); rc = ide_wait_not_busy(hwif, 100000); if (rc) printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 548864510ba9..d8c1c3e735bb 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -260,7 +260,7 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) * during the identify phase that the IRQ handler isn't expecting. */ if (io_ports->ctl_addr) - tp_ops->set_irq(hwif, 0); + tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS); /* take a deep breath */ msleep(50); @@ -390,13 +390,13 @@ static int do_probe (ide_drive_t *drive, u8 cmd) * (e.g. crw9624 as drive0 with disk as slave) */ msleep(50); - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); msleep(50); if (ide_read_device(drive) != drive->select && present == 0) { if (drive->dn & 1) { /* exit with drive0 selected */ - SELECT_DRIVE(hwif->devices[0]); + tp_ops->dev_select(hwif->devices[0]); /* allow ATA_BUSY to assert & clear */ msleep(50); } @@ -422,7 +422,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) printk(KERN_ERR "%s: no response (status = 0x%02x), " "resetting drive\n", drive->name, stat); msleep(50); - SELECT_DRIVE(drive); + tp_ops->dev_select(drive); msleep(50); tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); (void)ide_busy_sleep(hwif, WAIT_WORSTCASE, 0); @@ -441,7 +441,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) } if (drive->dn & 1) { /* exit with drive0 selected */ - SELECT_DRIVE(hwif->devices[0]); + tp_ops->dev_select(hwif->devices[0]); msleep(50); /* ensure drive irq is clear */ (void)tp_ops->read_status(hwif); @@ -605,6 +605,7 @@ out: static int ide_port_wait_ready(ide_hwif_t *hwif) { + const struct ide_tp_ops *tp_ops = hwif->tp_ops; ide_drive_t *drive; int i, rc; @@ -627,8 +628,8 @@ static int ide_port_wait_ready(ide_hwif_t *hwif) /* Ignore disks that we will not probe for later. */ if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 || (drive->dev_flags & IDE_DFLAG_PRESENT)) { - SELECT_DRIVE(drive); - hwif->tp_ops->set_irq(hwif, 1); + tp_ops->dev_select(drive); + tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); mdelay(2); rc = ide_wait_not_busy(hwif, 35000); if (rc) @@ -640,7 +641,7 @@ static int ide_port_wait_ready(ide_hwif_t *hwif) out: /* Exit function with master reselected (let's be sane) */ if (i) - SELECT_DRIVE(hwif->devices[0]); + tp_ops->dev_select(hwif->devices[0]); return rc; } @@ -845,7 +846,7 @@ static int init_irq (ide_hwif_t *hwif) irq_handler = ide_intr; if (io_ports->ctl_addr) - hwif->tp_ops->set_irq(hwif, 1); + hwif->tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); if (request_irq(hwif->irq, irq_handler, sa, hwif->name, hwif)) goto out_up; @@ -942,20 +943,16 @@ EXPORT_SYMBOL_GPL(ide_init_disk); static void drive_release_dev (struct device *dev) { ide_drive_t *drive = container_of(dev, ide_drive_t, gendev); - ide_hwif_t *hwif = drive->hwif; ide_proc_unregister_device(drive); - spin_lock_irq(&hwif->lock); + blk_cleanup_queue(drive->queue); + drive->queue = NULL; + kfree(drive->id); drive->id = NULL; + drive->dev_flags &= ~IDE_DFLAG_PRESENT; - /* Messed up locking ... */ - spin_unlock_irq(&hwif->lock); - blk_cleanup_queue(drive->queue); - spin_lock_irq(&hwif->lock); - drive->queue = NULL; - spin_unlock_irq(&hwif->lock); complete(&drive->gendev_rel_comp); } diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 64dfa7458f8d..cb942a9b580f 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -297,19 +297,15 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i) return tape; } -static void idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, +static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int bcount) { struct idetape_bh *bh = pc->bh; int count; while (bcount) { - if (bh == NULL) { - printk(KERN_ERR "ide-tape: bh == NULL in " - "idetape_input_buffers\n"); - ide_pad_transfer(drive, 0, bcount); - return; - } + if (bh == NULL) + break; count = min( (unsigned int)(bh->b_size - atomic_read(&bh->b_count)), bcount); @@ -323,21 +319,21 @@ static void idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, atomic_set(&bh->b_count, 0); } } + pc->bh = bh; + + return bcount; } -static void idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, +static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int bcount) { struct idetape_bh *bh = pc->bh; int count; while (bcount) { - if (bh == NULL) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return; - } + if (bh == NULL) + break; count = min((unsigned int)pc->b_count, (unsigned int)bcount); drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count); bcount -= count; @@ -352,6 +348,8 @@ static void idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, } } } + + return bcount; } static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) @@ -563,12 +561,14 @@ static void ide_tape_handle_dsc(ide_drive_t *drive) static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int bcount, int write) { + unsigned int bleft; + if (write) - idetape_output_buffers(drive, pc, bcount); + bleft = idetape_output_buffers(drive, pc, bcount); else - idetape_input_buffers(drive, pc, bcount); + bleft = idetape_input_buffers(drive, pc, bcount); - return bcount; + return bcount - bleft; } /* @@ -2014,9 +2014,13 @@ static void idetape_get_inquiry_results(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; struct ide_atapi_pc pc; + u8 pc_buf[256]; char fw_rev[4], vendor_id[8], product_id[16]; idetape_create_inquiry_cmd(&pc); + pc.buf = &pc_buf[0]; + pc.buf_size = sizeof(pc_buf); + if (ide_queue_pc_tail(drive, tape->disk, &pc)) { printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n", tape->name); diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 84532be97c00..243421ce40d0 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -80,8 +80,14 @@ ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd) if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) { ide_tf_dump(drive->name, tf); - tp_ops->set_irq(hwif, 1); + tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); SELECT_MASK(drive, 0); + + if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) { + u8 data[2] = { tf->data, tf->hob_data }; + + tp_ops->output_data(drive, cmd, data, 2); + } tp_ops->tf_load(drive, cmd); } @@ -100,9 +106,7 @@ ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd) ide_execute_command(drive, cmd, handler, WAIT_WORSTCASE); return ide_started; case ATA_PROT_DMA: - if ((drive->dev_flags & IDE_DFLAG_USING_DMA) == 0 || - ide_build_sglist(drive, cmd) == 0 || - dma_ops->dma_setup(drive, cmd)) + if (ide_dma_prepare(drive, cmd)) return ide_stopped; hwif->expiry = dma_ops->dma_timer_expiry; ide_execute_command(drive, cmd, ide_dma_intr, 2 * WAIT_CMD); @@ -188,70 +192,68 @@ static u8 wait_drive_not_busy(ide_drive_t *drive) return stat; } -static void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, - unsigned int write, unsigned int nr_bytes) +void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, + unsigned int write, unsigned int len) { ide_hwif_t *hwif = drive->hwif; struct scatterlist *sg = hwif->sg_table; struct scatterlist *cursg = cmd->cursg; struct page *page; -#ifdef CONFIG_HIGHMEM unsigned long flags; -#endif unsigned int offset; u8 *buf; cursg = cmd->cursg; - if (!cursg) { - cursg = sg; - cmd->cursg = sg; - } + if (cursg == NULL) + cursg = cmd->cursg = sg; - page = sg_page(cursg); - offset = cursg->offset + cmd->cursg_ofs; + while (len) { + unsigned nr_bytes = min(len, cursg->length - cmd->cursg_ofs); - /* get the current page and offset */ - page = nth_page(page, (offset >> PAGE_SHIFT)); - offset %= PAGE_SIZE; + if (nr_bytes > PAGE_SIZE) + nr_bytes = PAGE_SIZE; -#ifdef CONFIG_HIGHMEM - local_irq_save(flags); -#endif - buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset; + page = sg_page(cursg); + offset = cursg->offset + cmd->cursg_ofs; - cmd->nleft -= nr_bytes; - cmd->cursg_ofs += nr_bytes; + /* get the current page and offset */ + page = nth_page(page, (offset >> PAGE_SHIFT)); + offset %= PAGE_SIZE; - if (cmd->cursg_ofs == cursg->length) { - cmd->cursg = sg_next(cmd->cursg); - cmd->cursg_ofs = 0; - } + if (PageHighMem(page)) + local_irq_save(flags); - /* do the actual data transfer */ - if (write) - hwif->tp_ops->output_data(drive, cmd, buf, nr_bytes); - else - hwif->tp_ops->input_data(drive, cmd, buf, nr_bytes); + buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset; - kunmap_atomic(buf, KM_BIO_SRC_IRQ); -#ifdef CONFIG_HIGHMEM - local_irq_restore(flags); -#endif -} + cmd->nleft -= nr_bytes; + cmd->cursg_ofs += nr_bytes; -static void ide_pio_multi(ide_drive_t *drive, struct ide_cmd *cmd, - unsigned int write) -{ - unsigned int nsect; + if (cmd->cursg_ofs == cursg->length) { + cursg = cmd->cursg = sg_next(cmd->cursg); + cmd->cursg_ofs = 0; + } + + /* do the actual data transfer */ + if (write) + hwif->tp_ops->output_data(drive, cmd, buf, nr_bytes); + else + hwif->tp_ops->input_data(drive, cmd, buf, nr_bytes); + + kunmap_atomic(buf, KM_BIO_SRC_IRQ); - nsect = min_t(unsigned int, cmd->nleft >> 9, drive->mult_count); - while (nsect--) - ide_pio_bytes(drive, cmd, write, SECTOR_SIZE); + if (PageHighMem(page)) + local_irq_restore(flags); + + len -= nr_bytes; + } } +EXPORT_SYMBOL_GPL(ide_pio_bytes); static void ide_pio_datablock(ide_drive_t *drive, struct ide_cmd *cmd, unsigned int write) { + unsigned int nr_bytes; + u8 saved_io_32bit = drive->io_32bit; if (cmd->tf_flags & IDE_TFLAG_FS) @@ -263,9 +265,11 @@ static void ide_pio_datablock(ide_drive_t *drive, struct ide_cmd *cmd, touch_softlockup_watchdog(); if (cmd->tf_flags & IDE_TFLAG_MULTI_PIO) - ide_pio_multi(drive, cmd, write); + nr_bytes = min_t(unsigned, cmd->nleft, drive->mult_count << 9); else - ide_pio_bytes(drive, cmd, write, SECTOR_SIZE); + nr_bytes = SECTOR_SIZE; + + ide_pio_bytes(drive, cmd, write, nr_bytes); drive->io_32bit = saved_io_32bit; } diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c index 81f527af8fae..001a56365be5 100644 --- a/drivers/ide/ide-timings.c +++ b/drivers/ide/ide-timings.c @@ -43,6 +43,8 @@ static struct ide_timing ide_timing[] = { { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, + { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 80, 0 }, + { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 100, 0 }, { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 120, 0 }, { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 150, 0 }, { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 480, 0 }, @@ -51,7 +53,8 @@ static struct ide_timing ide_timing[] = { { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, - { XFER_PIO_5, 20, 50, 30, 100, 50, 30, 100, 0 }, + { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 80, 0 }, + { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 100, 0 }, { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 120, 0 }, { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 180, 0 }, @@ -90,6 +93,10 @@ u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio) /* conservative "downgrade" for all pre-ATA2 drives */ if (pio < 3 && cycle < t->cycle) cycle = 0; /* use standard timing */ + + /* Use the standard timing for the CF specific modes too */ + if (pio > 4 && ata_id_is_cfa(id)) + cycle = 0; } return cycle ? cycle : t->cycle; @@ -161,7 +168,8 @@ int ide_timing_compute(ide_drive_t *drive, u8 speed, if (speed <= XFER_PIO_2) p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO]; - else if (speed <= XFER_PIO_5) + else if ((speed <= XFER_PIO_4) || + (speed == XFER_PIO_5 && !ata_id_is_cfa(id))) p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY]; else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2) p.cycle = id[ATA_ID_EIDE_DMA_MIN]; diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c index 6910f6a257e8..af44be9d546c 100644 --- a/drivers/ide/ide-xfer-mode.c +++ b/drivers/ide/ide-xfer-mode.c @@ -9,11 +9,11 @@ static const char *udma_str[] = { "UDMA/16", "UDMA/25", "UDMA/33", "UDMA/44", "UDMA/66", "UDMA/100", "UDMA/133", "UDMA7" }; static const char *mwdma_str[] = - { "MWDMA0", "MWDMA1", "MWDMA2" }; + { "MWDMA0", "MWDMA1", "MWDMA2", "MWDMA3", "MWDMA4" }; static const char *swdma_str[] = { "SWDMA0", "SWDMA1", "SWDMA2" }; static const char *pio_str[] = - { "PIO0", "PIO1", "PIO2", "PIO3", "PIO4", "PIO5" }; + { "PIO0", "PIO1", "PIO2", "PIO3", "PIO4", "PIO5", "PIO6" }; /** * ide_xfer_verbose - return IDE mode names @@ -30,11 +30,11 @@ const char *ide_xfer_verbose(u8 mode) if (mode >= XFER_UDMA_0 && mode <= XFER_UDMA_7) s = udma_str[i]; - else if (mode >= XFER_MW_DMA_0 && mode <= XFER_MW_DMA_2) + else if (mode >= XFER_MW_DMA_0 && mode <= XFER_MW_DMA_4) s = mwdma_str[i]; else if (mode >= XFER_SW_DMA_0 && mode <= XFER_SW_DMA_2) s = swdma_str[i]; - else if (mode >= XFER_PIO_0 && mode <= XFER_PIO_5) + else if (mode >= XFER_PIO_0 && mode <= XFER_PIO_6) s = pio_str[i & 0x7]; else if (mode == XFER_PIO_SLOW) s = "PIO SLOW"; @@ -79,7 +79,10 @@ u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode) } if (id[ATA_ID_FIELD_VALID] & 2) { /* ATA2? */ - if (ata_id_has_iordy(id)) { + if (ata_id_is_cfa(id) && (id[ATA_ID_CFA_MODES] & 7)) + pio_mode = 4 + min_t(int, 2, + id[ATA_ID_CFA_MODES] & 7); + else if (ata_id_has_iordy(id)) { if (id[ATA_ID_PIO_MODES] & 7) { overridden = 0; if (id[ATA_ID_PIO_MODES] & 4) @@ -239,7 +242,7 @@ int ide_set_xfer_rate(ide_drive_t *drive, u8 rate) BUG_ON(rate < XFER_PIO_0); - if (rate >= XFER_PIO_0 && rate <= XFER_PIO_5) + if (rate >= XFER_PIO_0 && rate <= XFER_PIO_6) return ide_set_pio_mode(drive, rate); return ide_set_dma_mode(drive, rate); diff --git a/drivers/ide/ide_arm.c b/drivers/ide/ide_arm.c deleted file mode 100644 index cf6385446ece..000000000000 --- a/drivers/ide/ide_arm.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * ARM default IDE host driver - * - * Copyright (C) 2004 Bartlomiej Zolnierkiewicz - * Based on code by: Russell King, Ian Molton and Alexander Schulz. - * - * May be copied or modified under the terms of the GNU General Public License. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/ide.h> - -#include <asm/irq.h> - -#define DRV_NAME "ide_arm" - -#define IDE_ARM_IO 0x1f0 -#define IDE_ARM_IRQ IRQ_HARDDISK - -static const struct ide_port_info ide_arm_port_info = { - .host_flags = IDE_HFLAG_NO_DMA, -}; - -static int __init ide_arm_init(void) -{ - unsigned long base = IDE_ARM_IO, ctl = IDE_ARM_IO + 0x206; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; - - if (!request_region(base, 8, DRV_NAME)) { - printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n", - DRV_NAME, base, base + 7); - return -EBUSY; - } - - if (!request_region(ctl, 1, DRV_NAME)) { - printk(KERN_ERR "%s: I/O resource 0x%lX not free.\n", - DRV_NAME, ctl); - release_region(base, 8); - return -EBUSY; - } - - memset(&hw, 0, sizeof(hw)); - ide_std_init_ports(&hw, base, ctl); - hw.irq = IDE_ARM_IRQ; - hw.chipset = ide_generic; - - return ide_host_add(&ide_arm_port_info, hws, NULL); -} - -module_init(ide_arm_init); - -MODULE_LICENSE("GPL"); diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c index 0d4ac65cf949..51aa745246dc 100644 --- a/drivers/ide/it821x.c +++ b/drivers/ide/it821x.c @@ -511,9 +511,8 @@ static struct ide_dma_ops it821x_pass_through_dma_ops = { .dma_start = it821x_dma_start, .dma_end = it821x_dma_end, .dma_test_irq = ide_dma_test_irq, - .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, + .dma_timer_expiry = ide_dma_sff_timer_expiry, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c index 7b65fe5bf449..71a39fb3856f 100644 --- a/drivers/ide/ns87415.c +++ b/drivers/ide/ns87415.c @@ -66,18 +66,11 @@ static void superio_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) struct ide_io_ports *io_ports = &drive->hwif->io_ports; struct ide_taskfile *tf = &cmd->tf; - if (cmd->ftf_flags & IDE_FTFLAG_IN_DATA) { - u16 data = inw(io_ports->data_addr); - - tf->data = data & 0xff; - tf->hob_data = (data >> 8) & 0xff; - } - /* be sure we're looking at the low order bits */ - outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr); + outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -90,28 +83,30 @@ static void superio_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) tf->device = superio_ide_inb(io_ports->device_addr); if (cmd->tf_flags & IDE_TFLAG_LBA48) { - outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr); + outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = inb(io_ports->nsect_addr); + tf->hob_nsect = inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = inb(io_ports->lbal_addr); + tf->hob_lbal = inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = inb(io_ports->lbam_addr); + tf->hob_lbam = inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = inb(io_ports->lbah_addr); + tf->hob_lbah = inb(io_ports->lbah_addr); } } +static void ns87415_dev_select(ide_drive_t *drive); + static const struct ide_tp_ops superio_tp_ops = { .exec_command = ide_exec_command, .read_status = superio_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ns87415_dev_select, .tf_load = ide_tf_load, .tf_read = superio_tf_read, @@ -190,10 +185,18 @@ static void ns87415_prepare_drive (ide_drive_t *drive, unsigned int use_dma) local_irq_restore(flags); } -static void ns87415_selectproc (ide_drive_t *drive) +static void ns87415_dev_select(ide_drive_t *drive) { ns87415_prepare_drive(drive, !!(drive->dev_flags & IDE_DFLAG_USING_DMA)); + + outb(drive->select | ATA_DEVICE_OBS, drive->hwif->io_ports.device_addr); +} + +static void ns87415_dma_start(ide_drive_t *drive) +{ + ns87415_prepare_drive(drive, 1); + ide_dma_start(drive); } static int ns87415_dma_end(ide_drive_t *drive) @@ -201,7 +204,6 @@ static int ns87415_dma_end(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; u8 dma_stat = 0, dma_cmd = 0; - drive->waiting_for_dma = 0; dma_stat = hwif->dma_ops->dma_sff_read_status(hwif); /* get DMA command mode */ dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD); @@ -210,21 +212,11 @@ static int ns87415_dma_end(ide_drive_t *drive) /* from ERRATA: clear the INTR & ERROR bits */ dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD); outb(dma_cmd | 6, hwif->dma_base + ATA_DMA_CMD); - /* and free any DMA resources */ - ide_destroy_dmatable(drive); - /* verify good DMA status */ - return (dma_stat & 7) != 4; -} -static int ns87415_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) -{ - /* select DMA xfer */ - ns87415_prepare_drive(drive, 1); - if (ide_dma_setup(drive, cmd) == 0) - return 0; - /* DMA failed: select PIO xfer */ ns87415_prepare_drive(drive, 0); - return 1; + + /* verify good DMA status */ + return (dma_stat & 7) != 4; } static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) @@ -242,7 +234,7 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) * Also, leave IRQ masked during drive probing, to prevent infinite * interrupts from a potentially floating INTA.. * - * IRQs get unmasked in selectproc when drive is first used. + * IRQs get unmasked in dev_select() when drive is first used. */ (void) pci_read_config_dword(dev, 0x40, &ctrl); (void) pci_read_config_byte(dev, 0x09, &progif); @@ -270,7 +262,7 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) #ifdef __sparc_v9__ /* * XXX: Reset the device, if we don't it will not respond to - * SELECT_DRIVE() properly during first ide_probe_port(). + * dev_select() properly during first ide_probe_port(). */ timeout = 10000; outb(12, hwif->io_ports.ctl_addr); @@ -294,26 +286,35 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) outb(0x60, hwif->dma_base + ATA_DMA_STATUS); } -static const struct ide_port_ops ns87415_port_ops = { - .selectproc = ns87415_selectproc, +static const struct ide_tp_ops ns87415_tp_ops = { + .exec_command = ide_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, + + .dev_select = ns87415_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, }; static const struct ide_dma_ops ns87415_dma_ops = { .dma_host_set = ide_dma_host_set, - .dma_setup = ns87415_dma_setup, - .dma_start = ide_dma_start, + .dma_setup = ide_dma_setup, + .dma_start = ns87415_dma_start, .dma_end = ns87415_dma_end, .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = superio_dma_sff_read_status, }; static const struct ide_port_info ns87415_chipset __devinitdata = { .name = DRV_NAME, .init_hwif = init_hwif_ns87415, - .port_ops = &ns87415_port_ops, + .tp_ops = &ns87415_tp_ops, .dma_ops = &ns87415_dma_ops, .host_flags = IDE_HFLAG_TRUST_BIOS_FOR_DMA | IDE_HFLAG_NO_ATAPI_DMA, diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c index f7536d1943f7..248a54bd2386 100644 --- a/drivers/ide/pdc202xx_old.c +++ b/drivers/ide/pdc202xx_old.c @@ -258,12 +258,6 @@ static void pdc202xx_dma_lost_irq(ide_drive_t *drive) ide_dma_lost_irq(drive); } -static void pdc202xx_dma_timeout(ide_drive_t *drive) -{ - pdc202xx_reset(drive); - ide_dma_timeout(drive); -} - static int init_chipset_pdc202xx(struct pci_dev *dev) { unsigned long dmabase = pci_resource_start(dev, 4); @@ -336,7 +330,7 @@ static const struct ide_dma_ops pdc20246_dma_ops = { .dma_test_irq = pdc202xx_dma_test_irq, .dma_lost_irq = pdc202xx_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = pdc202xx_dma_timeout, + .dma_clear = pdc202xx_reset, .dma_sff_read_status = ide_dma_sff_read_status, }; @@ -348,7 +342,7 @@ static const struct ide_dma_ops pdc2026x_dma_ops = { .dma_test_irq = pdc202xx_dma_test_irq, .dma_lost_irq = pdc202xx_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = pdc202xx_dma_timeout, + .dma_clear = pdc202xx_reset, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c index 2bfcfedaa076..052b9bf1f8fb 100644 --- a/drivers/ide/pmac.c +++ b/drivers/ide/pmac.c @@ -404,8 +404,6 @@ kauai_lookup_timing(struct kauai_timing* table, int cycle_time) #define IDE_WAKEUP_DELAY (1*HZ) static int pmac_ide_init_dma(ide_hwif_t *, const struct ide_port_info *); -static void pmac_ide_selectproc(ide_drive_t *drive); -static void pmac_ide_kauai_selectproc(ide_drive_t *drive); #define PMAC_IDE_REG(x) \ ((void __iomem *)((drive)->hwif->io_ports.data_addr + (x))) @@ -415,8 +413,7 @@ static void pmac_ide_kauai_selectproc(ide_drive_t *drive); * timing register when selecting that unit. This version is for * ASICs with a single timing register */ -static void -pmac_ide_selectproc(ide_drive_t *drive) +static void pmac_ide_apply_timings(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; pmac_ide_hwif_t *pmif = @@ -434,8 +431,7 @@ pmac_ide_selectproc(ide_drive_t *drive) * timing register when selecting that unit. This version is for * ASICs with a dual timing register (Kauai) */ -static void -pmac_ide_kauai_selectproc(ide_drive_t *drive) +static void pmac_ide_kauai_apply_timings(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; pmac_ide_hwif_t *pmif = @@ -464,9 +460,25 @@ pmac_ide_do_update_timings(ide_drive_t *drive) if (pmif->kind == controller_sh_ata6 || pmif->kind == controller_un_ata6 || pmif->kind == controller_k2_ata6) - pmac_ide_kauai_selectproc(drive); + pmac_ide_kauai_apply_timings(drive); else - pmac_ide_selectproc(drive); + pmac_ide_apply_timings(drive); +} + +static void pmac_dev_select(ide_drive_t *drive) +{ + pmac_ide_apply_timings(drive); + + writeb(drive->select | ATA_DEVICE_OBS, + (void __iomem *)drive->hwif->io_ports.device_addr); +} + +static void pmac_kauai_dev_select(ide_drive_t *drive) +{ + pmac_ide_kauai_apply_timings(drive); + + writeb(drive->select | ATA_DEVICE_OBS, + (void __iomem *)drive->hwif->io_ports.device_addr); } static void pmac_exec_command(ide_hwif_t *hwif, u8 cmd) @@ -476,17 +488,8 @@ static void pmac_exec_command(ide_hwif_t *hwif, u8 cmd) + IDE_TIMING_CONFIG)); } -static void pmac_set_irq(ide_hwif_t *hwif, int on) +static void pmac_write_devctl(ide_hwif_t *hwif, u8 ctl) { - u8 ctl = ATA_DEVCTL_OBS; - - if (on == 4) { /* hack for SRST */ - ctl |= 4; - on &= ~4; - } - - ctl |= on ? 0 : 2; - writeb(ctl, (void __iomem *)hwif->io_ports.ctl_addr); (void)readl((void __iomem *)(hwif->io_ports.data_addr + IDE_TIMING_CONFIG)); @@ -916,10 +919,18 @@ static u8 pmac_ide_cable_detect(ide_hwif_t *hwif) (pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent); struct device_node *np = pmif->node; const char *cable = of_get_property(np, "cable-type", NULL); + struct device_node *root = of_find_node_by_path("/"); + const char *model = of_get_property(root, "model", NULL); /* Get cable type from device-tree. */ - if (cable && !strncmp(cable, "80-", 3)) - return ATA_CBL_PATA80; + if (cable && !strncmp(cable, "80-", 3)) { + /* Some drives fail to detect 80c cable in PowerBook */ + /* These machine use proprietary short IDE cable anyway */ + if (!strncmp(model, "PowerBook", 9)) + return ATA_CBL_PATA40_SHORT; + else + return ATA_CBL_PATA80; + } /* * G5's seem to have incorrect cable type in device-tree. @@ -954,9 +965,9 @@ static const struct ide_tp_ops pmac_tp_ops = { .exec_command = pmac_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = pmac_write_devctl, - .set_irq = pmac_set_irq, - + .dev_select = pmac_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, @@ -964,19 +975,24 @@ static const struct ide_tp_ops pmac_tp_ops = { .output_data = ide_output_data, }; -static const struct ide_port_ops pmac_ide_ata6_port_ops = { - .init_dev = pmac_ide_init_dev, - .set_pio_mode = pmac_ide_set_pio_mode, - .set_dma_mode = pmac_ide_set_dma_mode, - .selectproc = pmac_ide_kauai_selectproc, - .cable_detect = pmac_ide_cable_detect, +static const struct ide_tp_ops pmac_ata6_tp_ops = { + .exec_command = pmac_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = pmac_write_devctl, + + .dev_select = pmac_kauai_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, }; static const struct ide_port_ops pmac_ide_ata4_port_ops = { .init_dev = pmac_ide_init_dev, .set_pio_mode = pmac_ide_set_pio_mode, .set_dma_mode = pmac_ide_set_dma_mode, - .selectproc = pmac_ide_selectproc, .cable_detect = pmac_ide_cable_detect, }; @@ -984,7 +1000,6 @@ static const struct ide_port_ops pmac_ide_port_ops = { .init_dev = pmac_ide_init_dev, .set_pio_mode = pmac_ide_set_pio_mode, .set_dma_mode = pmac_ide_set_dma_mode, - .selectproc = pmac_ide_selectproc, }; static const struct ide_dma_ops pmac_dma_ops; @@ -1021,15 +1036,18 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw) pmif->broken_dma = pmif->broken_dma_warn = 0; if (of_device_is_compatible(np, "shasta-ata")) { pmif->kind = controller_sh_ata6; - d.port_ops = &pmac_ide_ata6_port_ops; + d.tp_ops = &pmac_ata6_tp_ops; + d.port_ops = &pmac_ide_ata4_port_ops; d.udma_mask = ATA_UDMA6; } else if (of_device_is_compatible(np, "kauai-ata")) { pmif->kind = controller_un_ata6; - d.port_ops = &pmac_ide_ata6_port_ops; + d.tp_ops = &pmac_ata6_tp_ops; + d.port_ops = &pmac_ide_ata4_port_ops; d.udma_mask = ATA_UDMA5; } else if (of_device_is_compatible(np, "K2-UATA")) { pmif->kind = controller_k2_ata6; - d.port_ops = &pmac_ide_ata6_port_ops; + d.tp_ops = &pmac_ata6_tp_ops; + d.port_ops = &pmac_ide_ata4_port_ops; d.udma_mask = ATA_UDMA5; } else if (of_device_is_compatible(np, "keylargo-ata")) { if (strcmp(np->name, "ata-4") == 0) { @@ -1455,7 +1473,7 @@ static int pmac_ide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd) "switching to PIO on Ohare chipset\n", drive->name); pmif->broken_dma_warn = 1; } - goto use_pio_instead; + return 0; } while (cur_len) { unsigned int tc = (cur_len < 0xfe00)? cur_len: 0xfe00; @@ -1463,7 +1481,7 @@ static int pmac_ide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd) if (count++ >= MAX_DCMDS) { printk(KERN_WARNING "%s: DMA table too small\n", drive->name); - goto use_pio_instead; + return 0; } st_le16(&table->command, wr? OUTPUT_MORE: INPUT_MORE); st_le16(&table->req_count, tc); @@ -1492,9 +1510,6 @@ static int pmac_ide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd) printk(KERN_DEBUG "%s: empty DMA table?\n", drive->name); -use_pio_instead: - ide_destroy_dmatable(drive); - return 0; /* revert to PIO for this request */ } @@ -1510,10 +1525,8 @@ static int pmac_ide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) u8 unit = drive->dn & 1, ata4 = (pmif->kind == controller_kl_ata4); u8 write = !!(cmd->tf_flags & IDE_TFLAG_WRITE); - if (pmac_ide_build_dmatable(drive, cmd) == 0) { - ide_map_sg(drive, cmd); + if (pmac_ide_build_dmatable(drive, cmd) == 0) return 1; - } /* Apple adds 60ns to wrDataSetup on reads */ if (ata4 && (pmif->timings[unit] & TR_66_UDMA_EN)) { @@ -1522,8 +1535,6 @@ static int pmac_ide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) (void)readl(PMAC_IDE_REG(IDE_TIMING_CONFIG)); } - drive->waiting_for_dma = 1; - return 0; } @@ -1558,12 +1569,9 @@ pmac_ide_dma_end (ide_drive_t *drive) volatile struct dbdma_regs __iomem *dma = pmif->dma_regs; u32 dstat; - drive->waiting_for_dma = 0; dstat = readl(&dma->status); writel(((RUN|WAKE|DEAD) << 16), &dma->control); - ide_destroy_dmatable(drive); - /* verify good dma status. we don't check for ACTIVE beeing 0. We should... * in theory, but with ATAPI decices doing buffer underruns, that would * cause us to disable DMA, which isn't what we want @@ -1650,7 +1658,6 @@ static const struct ide_dma_ops pmac_dma_ops = { .dma_start = pmac_ide_dma_start, .dma_end = pmac_ide_dma_end, .dma_test_irq = pmac_ide_dma_test_irq, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = pmac_ide_dma_lost_irq, }; diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c index 2a43a2f49633..d007e7f66598 100644 --- a/drivers/ide/q40ide.c +++ b/drivers/ide/q40ide.c @@ -99,9 +99,9 @@ static const struct ide_tp_ops q40ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c index 08c4fa35e9b1..c9a134986891 100644 --- a/drivers/ide/qd65xx.c +++ b/drivers/ide/qd65xx.c @@ -90,13 +90,15 @@ static int timings[4]={-1,-1,-1,-1}; /* stores current timing for each timer */ * This routine is invoked to prepare for access to a given drive. */ -static void qd65xx_select(ide_drive_t *drive) +static void qd65xx_dev_select(ide_drive_t *drive) { u8 index = (( (QD_TIMREG(drive)) & 0x80 ) >> 7) | (QD_TIMREG(drive) & 0x02); if (timings[index] != QD_TIMING(drive)) outb(timings[index] = QD_TIMING(drive), QD_TIMREG(drive)); + + outb(drive->select | ATA_DEVICE_OBS, drive->hwif->io_ports.device_addr); } /* @@ -309,20 +311,33 @@ static void __init qd6580_init_dev(ide_drive_t *drive) drive->drive_data = (drive->dn & 1) ? t2 : t1; } +static const struct ide_tp_ops qd65xx_tp_ops = { + .exec_command = ide_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, + + .dev_select = qd65xx_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, +}; + static const struct ide_port_ops qd6500_port_ops = { .init_dev = qd6500_init_dev, .set_pio_mode = qd6500_set_pio_mode, - .selectproc = qd65xx_select, }; static const struct ide_port_ops qd6580_port_ops = { .init_dev = qd6580_init_dev, .set_pio_mode = qd6580_set_pio_mode, - .selectproc = qd65xx_select, }; static const struct ide_port_info qd65xx_port_info __initdata = { .name = DRV_NAME, + .tp_ops = &qd65xx_tp_ops, .chipset = ide_qd65xx, .host_flags = IDE_HFLAG_IO_32BIT | IDE_HFLAG_NO_DMA, diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c index 1c3a82914999..d467478d68da 100644 --- a/drivers/ide/sc1200.c +++ b/drivers/ide/sc1200.c @@ -115,8 +115,7 @@ static u8 sc1200_udma_filter(ide_drive_t *drive) if ((mateid[ATA_ID_FIELD_VALID] & 4) && (mateid[ATA_ID_UDMA_MODES] & 7)) goto out; - if ((mateid[ATA_ID_FIELD_VALID] & 2) && - (mateid[ATA_ID_MWDMA_MODES] & 7)) + if (mateid[ATA_ID_MWDMA_MODES] & 7) mask = 0; } out: @@ -183,9 +182,6 @@ static int sc1200_dma_end(ide_drive_t *drive) outb(dma_stat|0x1b, dma_base+2); /* clear the INTR & ERROR bits */ outb(inb(dma_base)&~1, dma_base); /* !! DO THIS HERE !! stop DMA */ - drive->waiting_for_dma = 0; - ide_destroy_dmatable(drive); /* purge DMA mappings */ - return (dma_stat & 7) != 4; /* verify good DMA status */ } @@ -291,7 +287,6 @@ static const struct ide_dma_ops sc1200_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c index 0cc137cfe76d..6d8dbd9c10bc 100644 --- a/drivers/ide/scc_pata.c +++ b/drivers/ide/scc_pata.c @@ -148,17 +148,8 @@ static u8 scc_dma_sff_read_status(ide_hwif_t *hwif) return (u8)in_be32((void *)(hwif->dma_base + 4)); } -static void scc_set_irq(ide_hwif_t *hwif, int on) +static void scc_write_devctl(ide_hwif_t *hwif, u8 ctl) { - u8 ctl = ATA_DEVCTL_OBS; - - if (on == 4) { /* hack for SRST */ - ctl |= 4; - on &= ~4; - } - - ctl |= on ? 0 : 2; - out_be32((void *)hwif->io_ports.ctl_addr, ctl); eieio(); in_be32((void *)(hwif->dma_base + 0x01c)); @@ -321,10 +312,8 @@ static int scc_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) u8 dma_stat; /* fall back to pio! */ - if (ide_build_dmatable(drive, cmd) == 0) { - ide_map_sg(drive, cmd); + if (ide_build_dmatable(drive, cmd) == 0) return 1; - } /* PRD table */ out_be32((void __iomem *)(hwif->dma_base + 8), hwif->dmatable_dma); @@ -337,7 +326,7 @@ static int scc_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) /* clear INTR & ERROR flags */ out_be32((void __iomem *)(hwif->dma_base + 4), dma_stat | 6); - drive->waiting_for_dma = 1; + return 0; } @@ -356,7 +345,6 @@ static int __scc_dma_end(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; u8 dma_stat, dma_cmd; - drive->waiting_for_dma = 0; /* get DMA command mode */ dma_cmd = scc_ide_inb(hwif->dma_base); /* stop DMA */ @@ -365,8 +353,6 @@ static int __scc_dma_end(ide_drive_t *drive) dma_stat = scc_dma_sff_read_status(hwif); /* clear the INTR & ERROR bits */ scc_ide_outb(dma_stat | 6, hwif->dma_base + 4); - /* purge DMA mappings */ - ide_destroy_dmatable(drive); /* verify good DMA status */ wmb(); return (dma_stat & 7) != 4 ? (0x10 | dma_stat) : 0; @@ -670,10 +656,6 @@ static void scc_tf_load(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->ftf_flags & IDE_FTFLAG_FLAGGED) HIHI = 0xFF; - if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) - out_be32((void *)io_ports->data_addr, - (tf->hob_data << 8) | tf->data); - if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_FEATURE) scc_ide_outb(tf->hob_feature, io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_NSECT) @@ -706,18 +688,11 @@ static void scc_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) struct ide_io_ports *io_ports = &drive->hwif->io_ports; struct ide_taskfile *tf = &cmd->tf; - if (cmd->ftf_flags & IDE_FTFLAG_IN_DATA) { - u16 data = (u16)in_be32((void *)io_ports->data_addr); - - tf->data = data & 0xff; - tf->hob_data = (data >> 8) & 0xff; - } - /* be sure we're looking at the low order bits */ - scc_ide_outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr); + scc_ide_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = scc_ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = scc_ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = scc_ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -730,18 +705,18 @@ static void scc_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) tf->device = scc_ide_inb(io_ports->device_addr); if (cmd->tf_flags & IDE_TFLAG_LBA48) { - scc_ide_outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr); + scc_ide_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = scc_ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = scc_ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = scc_ide_inb(io_ports->nsect_addr); + tf->hob_nsect = scc_ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = scc_ide_inb(io_ports->lbal_addr); + tf->hob_lbal = scc_ide_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = scc_ide_inb(io_ports->lbam_addr); + tf->hob_lbam = scc_ide_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = scc_ide_inb(io_ports->lbah_addr); + tf->hob_lbah = scc_ide_inb(io_ports->lbah_addr); } } @@ -848,9 +823,9 @@ static const struct ide_tp_ops scc_tp_ops = { .exec_command = scc_exec_command, .read_status = scc_read_status, .read_altstatus = scc_read_altstatus, + .write_devctl = scc_write_devctl, - .set_irq = scc_set_irq, - + .dev_select = ide_dev_select, .tf_load = scc_tf_load, .tf_read = scc_tf_read, @@ -872,7 +847,6 @@ static const struct ide_dma_ops scc_dma_ops = { .dma_end = scc_dma_end, .dma_test_irq = scc_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, - .dma_timeout = ide_dma_timeout, .dma_timer_expiry = ide_dma_sff_timer_expiry, .dma_sff_read_status = scc_dma_sff_read_status, }; diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c index b12de8346c73..e5d2a48a84de 100644 --- a/drivers/ide/sgiioc4.c +++ b/drivers/ide/sgiioc4.c @@ -258,9 +258,6 @@ static int sgiioc4_dma_end(ide_drive_t *drive) } } - drive->waiting_for_dma = 0; - ide_destroy_dmatable(drive); - return dma_stat; } @@ -280,10 +277,12 @@ static void sgiioc4_dma_host_set(ide_drive_t *drive, int on) sgiioc4_clearirq(drive); } -static void -sgiioc4_resetproc(ide_drive_t * drive) +static void sgiioc4_resetproc(ide_drive_t *drive) { + struct ide_cmd *cmd = &drive->hwif->cmd; + sgiioc4_dma_end(drive); + ide_dma_unmap_sg(drive, cmd); sgiioc4_clearirq(drive); } @@ -412,7 +411,6 @@ sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive) writel(ending_dma_addr, (void __iomem *)(dma_base + IOC4_DMA_END_ADDR * 4)); writel(dma_direction, (void __iomem *)ioc4_dma_addr); - drive->waiting_for_dma = 1; } /* IOC4 Scatter Gather list Format */ @@ -442,7 +440,7 @@ static int sgiioc4_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd) printk(KERN_WARNING "%s: DMA table too small\n", drive->name); - goto use_pio_instead; + return 0; } else { u32 bcount = 0x10000 - (cur_addr & 0xffff); @@ -477,9 +475,6 @@ static int sgiioc4_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd) return count; } -use_pio_instead: - ide_destroy_dmatable(drive); - return 0; /* revert to PIO for this request */ } @@ -488,11 +483,9 @@ static int sgiioc4_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) int ddir; u8 write = !!(cmd->tf_flags & IDE_TFLAG_WRITE); - if (sgiioc4_build_dmatable(drive, cmd) == 0) { + if (sgiioc4_build_dmatable(drive, cmd) == 0) /* try PIO instead of DMA */ - ide_map_sg(drive, cmd); return 1; - } if (write) /* Writes TO the IOC4 FROM Main Memory */ @@ -510,9 +503,9 @@ static const struct ide_tp_ops sgiioc4_tp_ops = { .exec_command = ide_exec_command, .read_status = sgiioc4_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ide_dev_select, .tf_load = ide_tf_load, .tf_read = ide_tf_read, @@ -533,7 +526,6 @@ static const struct ide_dma_ops sgiioc4_dma_ops = { .dma_end = sgiioc4_dma_end, .dma_test_irq = sgiioc4_dma_test_irq, .dma_lost_irq = sgiioc4_dma_lost_irq, - .dma_timeout = ide_dma_timeout, }; static const struct ide_port_info sgiioc4_port_info __devinitconst = { diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c index 075cb1243b2a..e4973cd1fba9 100644 --- a/drivers/ide/siimage.c +++ b/drivers/ide/siimage.c @@ -715,7 +715,6 @@ static const struct ide_dma_ops sil_dma_ops = { .dma_end = ide_dma_end, .dma_test_irq = siimage_dma_test_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_lost_irq = ide_dma_lost_irq, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c index d25137b04e7a..b0a460625335 100644 --- a/drivers/ide/sl82c105.c +++ b/drivers/ide/sl82c105.c @@ -61,7 +61,8 @@ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio) if (cmd_off == 0) cmd_off = 1; - if (pio > 2 || ata_id_has_iordy(drive->id)) + if ((pio > 2 || ata_id_has_iordy(drive->id)) && + !(pio > 4 && ata_id_is_cfa(drive->id))) iordy = 0x40; return (cmd_on - 1) << 8 | (cmd_off - 1) | iordy; @@ -189,14 +190,13 @@ static void sl82c105_dma_start(ide_drive_t *drive) ide_dma_start(drive); } -static void sl82c105_dma_timeout(ide_drive_t *drive) +static void sl82c105_dma_clear(ide_drive_t *drive) { struct pci_dev *dev = to_pci_dev(drive->hwif->dev); - DBG(("sl82c105_dma_timeout(drive:%s)\n", drive->name)); + DBG(("sl82c105_dma_clear(drive:%s)\n", drive->name)); sl82c105_reset_host(dev); - ide_dma_timeout(drive); } static int sl82c105_dma_end(ide_drive_t *drive) @@ -298,7 +298,7 @@ static const struct ide_dma_ops sl82c105_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = sl82c105_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = sl82c105_dma_timeout, + .dma_clear = sl82c105_dma_clear, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c index 427d4b3c2c63..b4cf42dc8a6f 100644 --- a/drivers/ide/tc86c001.c +++ b/drivers/ide/tc86c001.c @@ -187,7 +187,6 @@ static const struct ide_dma_ops tc86c001_dma_ops = { .dma_test_irq = ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c index ed1496845a93..4b42ca091534 100644 --- a/drivers/ide/trm290.c +++ b/drivers/ide/trm290.c @@ -171,54 +171,51 @@ static void trm290_prepare_drive (ide_drive_t *drive, unsigned int use_dma) local_irq_restore(flags); } -static void trm290_selectproc (ide_drive_t *drive) +static void trm290_dev_select(ide_drive_t *drive) { trm290_prepare_drive(drive, !!(drive->dev_flags & IDE_DFLAG_USING_DMA)); + + outb(drive->select | ATA_DEVICE_OBS, drive->hwif->io_ports.device_addr); } -static int trm290_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) +static int trm290_dma_check(ide_drive_t *drive, struct ide_cmd *cmd) { - ide_hwif_t *hwif = drive->hwif; - unsigned int count, rw; - if (cmd->tf_flags & IDE_TFLAG_WRITE) { #ifdef TRM290_NO_DMA_WRITES /* always use PIO for writes */ - trm290_prepare_drive(drive, 0); /* select PIO xfer */ return 1; #endif - rw = 1; - } else - rw = 2; + } + return 0; +} + +static int trm290_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) +{ + ide_hwif_t *hwif = drive->hwif; + unsigned int count, rw = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 1 : 2; count = ide_build_dmatable(drive, cmd); - if (count == 0) { - ide_map_sg(drive, cmd); + if (count == 0) /* try PIO instead of DMA */ - trm290_prepare_drive(drive, 0); /* select PIO xfer */ return 1; - } - /* select DMA xfer */ - trm290_prepare_drive(drive, 1); + outl(hwif->dmatable_dma | rw, hwif->dma_base); - drive->waiting_for_dma = 1; /* start DMA */ outw(count * 2 - 1, hwif->dma_base + 2); + return 0; } static void trm290_dma_start(ide_drive_t *drive) { + trm290_prepare_drive(drive, 1); } static int trm290_dma_end(ide_drive_t *drive) { - u16 status; + u16 status = inw(drive->hwif->dma_base + 2); - drive->waiting_for_dma = 0; - /* purge DMA mappings */ - ide_destroy_dmatable(drive); - status = inw(drive->hwif->dma_base + 2); + trm290_prepare_drive(drive, 0); return status != 0x00ff; } @@ -303,8 +300,18 @@ static void __devinit init_hwif_trm290(ide_hwif_t *hwif) #endif } -static const struct ide_port_ops trm290_port_ops = { - .selectproc = trm290_selectproc, +static const struct ide_tp_ops trm290_tp_ops = { + .exec_command = ide_exec_command, + .read_status = ide_read_status, + .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, + + .dev_select = trm290_dev_select, + .tf_load = ide_tf_load, + .tf_read = ide_tf_read, + + .input_data = ide_input_data, + .output_data = ide_output_data, }; static struct ide_dma_ops trm290_dma_ops = { @@ -314,13 +321,13 @@ static struct ide_dma_ops trm290_dma_ops = { .dma_end = trm290_dma_end, .dma_test_irq = trm290_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, - .dma_timeout = ide_dma_timeout, + .dma_check = trm290_dma_check, }; static const struct ide_port_info trm290_chipset __devinitdata = { .name = DRV_NAME, .init_hwif = init_hwif_trm290, - .port_ops = &trm290_port_ops, + .tp_ops = &trm290_tp_ops, .dma_ops = &trm290_dma_ops, .host_flags = IDE_HFLAG_TRM290 | IDE_HFLAG_NO_ATAPI_DMA | diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c index 657a61890b1c..4cb79c4c2604 100644 --- a/drivers/ide/tx4938ide.c +++ b/drivers/ide/tx4938ide.c @@ -92,13 +92,6 @@ static void tx4938ide_tf_load(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->ftf_flags & IDE_FTFLAG_FLAGGED) HIHI = 0xFF; - if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) { - u16 data = (tf->hob_data << 8) | tf->data; - - /* no endian swap */ - __raw_writew(data, (void __iomem *)io_ports->data_addr); - } - if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_FEATURE) tx4938ide_outb(tf->hob_feature, io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_NSECT) @@ -132,20 +125,11 @@ static void tx4938ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) struct ide_io_ports *io_ports = &hwif->io_ports; struct ide_taskfile *tf = &cmd->tf; - if (cmd->ftf_flags & IDE_FTFLAG_IN_DATA) { - u16 data; - - /* no endian swap */ - data = __raw_readw((void __iomem *)io_ports->data_addr); - tf->data = data & 0xff; - tf->hob_data = (data >> 8) & 0xff; - } - /* be sure we're looking at the low order bits */ - tx4938ide_outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr); + tx4938ide_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = tx4938ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = tx4938ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = tx4938ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -158,19 +142,18 @@ static void tx4938ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) tf->device = tx4938ide_inb(io_ports->device_addr); if (cmd->tf_flags & IDE_TFLAG_LBA48) { - tx4938ide_outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr); + tx4938ide_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = - tx4938ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = tx4938ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = tx4938ide_inb(io_ports->nsect_addr); + tf->hob_nsect = tx4938ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = tx4938ide_inb(io_ports->lbal_addr); + tf->hob_lbal = tx4938ide_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = tx4938ide_inb(io_ports->lbam_addr); + tf->hob_lbam = tx4938ide_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = tx4938ide_inb(io_ports->lbah_addr); + tf->hob_lbah = tx4938ide_inb(io_ports->lbah_addr); } } @@ -204,9 +187,9 @@ static const struct ide_tp_ops tx4938ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ide_dev_select, .tf_load = tx4938ide_tf_load, .tf_read = tx4938ide_tf_read, diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index e0e0a803dde3..0040a9a3e26e 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -279,8 +279,6 @@ use_pio_instead: printk(KERN_ERR "%s: %s\n", drive->name, count ? "DMA table too small" : "empty DMA table?"); - ide_destroy_dmatable(drive); - return 0; /* revert to PIO for this request */ } #else @@ -294,10 +292,8 @@ static int tx4939ide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) u8 rw = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 0 : ATA_DMA_WR; /* fall back to PIO! */ - if (tx4939ide_build_dmatable(drive, cmd) == 0) { - ide_map_sg(drive, cmd); + if (tx4939ide_build_dmatable(drive, cmd) == 0) return 1; - } /* PRD table */ tx4939ide_writel(hwif->dmatable_dma, base, TX4939IDE_PRD_Ptr); @@ -308,8 +304,6 @@ static int tx4939ide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd) /* clear INTR & ERROR flags */ tx4939ide_clear_dma_status(base); - drive->waiting_for_dma = 1; - tx4939ide_writew(SECTOR_SIZE / 2, base, drive->dn ? TX4939IDE_Xfer_Cnt_2 : TX4939IDE_Xfer_Cnt_1); @@ -325,8 +319,6 @@ static int tx4939ide_dma_end(ide_drive_t *drive) void __iomem *base = TX4939IDE_BASE(hwif); u16 ctl = tx4939ide_readw(base, TX4939IDE_Int_Ctl); - drive->waiting_for_dma = 0; - /* get DMA command mode */ dma_cmd = tx4939ide_readb(base, TX4939IDE_DMA_Cmd); /* stop DMA */ @@ -335,11 +327,9 @@ static int tx4939ide_dma_end(ide_drive_t *drive) /* read and clear the INTR & ERROR bits */ dma_stat = tx4939ide_clear_dma_status(base); - /* purge DMA mappings */ - ide_destroy_dmatable(drive); - /* verify good DMA status */ wmb(); + /* verify good DMA status */ if ((dma_stat & (ATA_DMA_INTR | ATA_DMA_ERR | ATA_DMA_ACTIVE)) == 0 && (ctl & (TX4939IDE_INT_XFEREND | TX4939IDE_INT_HOST)) == (TX4939IDE_INT_XFEREND | TX4939IDE_INT_HOST)) @@ -439,7 +429,7 @@ static void tx4939ide_tf_load_fixup(ide_drive_t *drive) * Fix ATA100 CORE System Control Register. (The write to the * Device/Head register may write wrong data to the System * Control Register) - * While Sys_Ctl is written here, selectproc is not needed. + * While Sys_Ctl is written here, dev_select() is not needed. */ tx4939ide_writew(sysctl, base, TX4939IDE_Sys_Ctl); } @@ -467,13 +457,6 @@ static void tx4939ide_tf_load(ide_drive_t *drive, struct ide_cmd *cmd) if (cmd->ftf_flags & IDE_FTFLAG_FLAGGED) HIHI = 0xFF; - if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) { - u16 data = (tf->hob_data << 8) | tf->data; - - /* no endian swap */ - __raw_writew(data, (void __iomem *)io_ports->data_addr); - } - if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_FEATURE) tx4939ide_outb(tf->hob_feature, io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_OUT_HOB_NSECT) @@ -509,20 +492,11 @@ static void tx4939ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) struct ide_io_ports *io_ports = &hwif->io_ports; struct ide_taskfile *tf = &cmd->tf; - if (cmd->ftf_flags & IDE_FTFLAG_IN_DATA) { - u16 data; - - /* no endian swap */ - data = __raw_readw((void __iomem *)io_ports->data_addr); - tf->data = data & 0xff; - tf->hob_data = (data >> 8) & 0xff; - } - /* be sure we're looking at the low order bits */ - tx4939ide_outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr); + tx4939ide_outb(ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_FEATURE) - tf->feature = tx4939ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_ERROR) + tf->error = tx4939ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_NSECT) tf->nsect = tx4939ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_LBAL) @@ -535,19 +509,18 @@ static void tx4939ide_tf_read(ide_drive_t *drive, struct ide_cmd *cmd) tf->device = tx4939ide_inb(io_ports->device_addr); if (cmd->tf_flags & IDE_TFLAG_LBA48) { - tx4939ide_outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr); + tx4939ide_outb(ATA_HOB | ATA_DEVCTL_OBS, io_ports->ctl_addr); - if (cmd->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) - tf->hob_feature = - tx4939ide_inb(io_ports->feature_addr); + if (cmd->tf_flags & IDE_TFLAG_IN_HOB_ERROR) + tf->hob_error = tx4939ide_inb(io_ports->feature_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_NSECT) - tf->hob_nsect = tx4939ide_inb(io_ports->nsect_addr); + tf->hob_nsect = tx4939ide_inb(io_ports->nsect_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAL) - tf->hob_lbal = tx4939ide_inb(io_ports->lbal_addr); + tf->hob_lbal = tx4939ide_inb(io_ports->lbal_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAM) - tf->hob_lbam = tx4939ide_inb(io_ports->lbam_addr); + tf->hob_lbam = tx4939ide_inb(io_ports->lbam_addr); if (cmd->tf_flags & IDE_TFLAG_IN_HOB_LBAH) - tf->hob_lbah = tx4939ide_inb(io_ports->lbah_addr); + tf->hob_lbah = tx4939ide_inb(io_ports->lbah_addr); } } @@ -581,9 +554,9 @@ static const struct ide_tp_ops tx4939ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ide_dev_select, .tf_load = tx4939ide_tf_load, .tf_read = tx4939ide_tf_read, @@ -605,9 +578,9 @@ static const struct ide_tp_ops tx4939ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, + .write_devctl = ide_write_devctl, - .set_irq = ide_set_irq, - + .dev_select = ide_dev_select, .tf_load = tx4939ide_tf_load, .tf_read = ide_tf_read, @@ -632,7 +605,6 @@ static const struct ide_dma_ops tx4939ide_dma_ops = { .dma_test_irq = tx4939ide_dma_test_irq, .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_timeout = ide_dma_timeout, .dma_sff_read_status = tx4939ide_dma_sff_read_status, }; diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 3e468d2cf730..2d8352419c0d 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -1331,12 +1331,6 @@ static void capinc_tty_send_xchar(struct tty_struct *tty, char ch) #endif } -static int capinc_tty_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - return 0; -} - static struct tty_driver *capinc_tty_driver; static const struct tty_operations capinc_ops = { @@ -1358,7 +1352,6 @@ static const struct tty_operations capinc_ops = { .flush_buffer = capinc_tty_flush_buffer, .set_ldisc = capinc_tty_set_ldisc, .send_xchar = capinc_tty_send_xchar, - .read_proc = capinc_tty_read_proc, }; static int capinc_tty_init(void) diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 1c484084ed4f..5f3bff434621 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -223,6 +223,16 @@ config DELL_LAPTOP This driver adds support for rfkill and backlight control to Dell laptops. +config ISL29003 + tristate "Intersil ISL29003 ambient light sensor" + depends on I2C && SYSFS + help + If you say yes here you get support for the Intersil ISL29003 + ambient light sensor. + + This driver can also be built as a module. If so, the module + will be called isl29003. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index bc1199830554..7871f05dcb9b 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -18,5 +18,6 @@ obj-$(CONFIG_KGDB_TESTS) += kgdbts.o obj-$(CONFIG_SGI_XP) += sgi-xp/ obj-$(CONFIG_SGI_GRU) += sgi-gru/ obj-$(CONFIG_HP_ILO) += hpilo.o +obj-$(CONFIG_ISL29003) += isl29003.o obj-$(CONFIG_C2PORT) += c2port/ obj-y += eeprom/ diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c index cf991850f01b..880ccf39e23b 100644 --- a/drivers/misc/hpilo.c +++ b/drivers/misc/hpilo.c @@ -209,7 +209,7 @@ static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data) /* give iLO some time to process stop request */ for (retries = MAX_WAIT; retries > 0; retries--) { doorbell_set(driver_ccb); - udelay(1); + udelay(WAIT_TIME); if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A)) && !(ioread32(&device_ccb->recv_ctrl) & (1 << CTRL_BITPOS_A))) @@ -312,7 +312,7 @@ static int ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot) for (i = MAX_WAIT; i > 0; i--) { if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL)) break; - udelay(1); + udelay(WAIT_TIME); } if (i) { @@ -759,7 +759,7 @@ static void __exit ilo_exit(void) class_destroy(ilo_class); } -MODULE_VERSION("1.0"); +MODULE_VERSION("1.1"); MODULE_ALIAS(ILO_NAME); MODULE_DESCRIPTION(ILO_NAME); MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>"); diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h index b64a20ef07e3..03a14c82aad9 100644 --- a/drivers/misc/hpilo.h +++ b/drivers/misc/hpilo.h @@ -19,8 +19,12 @@ #define MAX_ILO_DEV 1 /* max number of files */ #define MAX_OPEN (MAX_CCB * MAX_ILO_DEV) +/* total wait time in usec */ +#define MAX_WAIT_TIME 10000 +/* per spin wait time in usec */ +#define WAIT_TIME 10 /* spin counter for open/close delay */ -#define MAX_WAIT 10000 +#define MAX_WAIT (MAX_WAIT_TIME / WAIT_TIME) /* * Per device, used to track global memory allocations. diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c new file mode 100644 index 000000000000..2e2a5923d4c2 --- /dev/null +++ b/drivers/misc/isl29003.c @@ -0,0 +1,470 @@ +/* + * isl29003.c - Linux kernel module for + * Intersil ISL29003 ambient light sensor + * + * See file:Documentation/misc-devices/isl29003 + * + * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de> + * + * Based on code written by + * Rodolfo Giometti <giometti@linux.it> + * Eurotech S.p.A. <info@eurotech.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/mutex.h> +#include <linux/delay.h> + +#define ISL29003_DRV_NAME "isl29003" +#define DRIVER_VERSION "1.0" + +#define ISL29003_REG_COMMAND 0x00 +#define ISL29003_ADC_ENABLED (1 << 7) +#define ISL29003_ADC_PD (1 << 6) +#define ISL29003_TIMING_INT (1 << 5) +#define ISL29003_MODE_SHIFT (2) +#define ISL29003_MODE_MASK (0x3 << ISL29003_MODE_SHIFT) +#define ISL29003_RES_SHIFT (0) +#define ISL29003_RES_MASK (0x3 << ISL29003_RES_SHIFT) + +#define ISL29003_REG_CONTROL 0x01 +#define ISL29003_INT_FLG (1 << 5) +#define ISL29003_RANGE_SHIFT (2) +#define ISL29003_RANGE_MASK (0x3 << ISL29003_RANGE_SHIFT) +#define ISL29003_INT_PERSISTS_SHIFT (0) +#define ISL29003_INT_PERSISTS_MASK (0xf << ISL29003_INT_PERSISTS_SHIFT) + +#define ISL29003_REG_IRQ_THRESH_HI 0x02 +#define ISL29003_REG_IRQ_THRESH_LO 0x03 +#define ISL29003_REG_LSB_SENSOR 0x04 +#define ISL29003_REG_MSB_SENSOR 0x05 +#define ISL29003_REG_LSB_TIMER 0x06 +#define ISL29003_REG_MSB_TIMER 0x07 + +#define ISL29003_NUM_CACHABLE_REGS 4 + +struct isl29003_data { + struct i2c_client *client; + struct mutex lock; + u8 reg_cache[ISL29003_NUM_CACHABLE_REGS]; +}; + +static int gain_range[] = { + 1000, 4000, 16000, 64000 +}; + +/* + * register access helpers + */ + +static int __isl29003_read_reg(struct i2c_client *client, + u32 reg, u8 mask, u8 shift) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + return (data->reg_cache[reg] & mask) >> shift; +} + +static int __isl29003_write_reg(struct i2c_client *client, + u32 reg, u8 mask, u8 shift, u8 val) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int ret = 0; + u8 tmp; + + if (reg >= ISL29003_NUM_CACHABLE_REGS) + return -EINVAL; + + mutex_lock(&data->lock); + + tmp = data->reg_cache[reg]; + tmp &= ~mask; + tmp |= val << shift; + + ret = i2c_smbus_write_byte_data(client, reg, tmp); + if (!ret) + data->reg_cache[reg] = tmp; + + mutex_unlock(&data->lock); + return ret; +} + +/* + * internally used functions + */ + +/* range */ +static int isl29003_get_range(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_CONTROL, + ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT); +} + +static int isl29003_set_range(struct i2c_client *client, int range) +{ + return __isl29003_write_reg(client, ISL29003_REG_CONTROL, + ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT, range); +} + +/* resolution */ +static int isl29003_get_resolution(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT); +} + +static int isl29003_set_resolution(struct i2c_client *client, int res) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT, res); +} + +/* mode */ +static int isl29003_get_mode(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT); +} + +static int isl29003_set_mode(struct i2c_client *client, int mode) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT, mode); +} + +/* power_state */ +static int isl29003_set_power_state(struct i2c_client *client, int state) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_ADC_ENABLED | ISL29003_ADC_PD, 0, + state ? ISL29003_ADC_ENABLED : ISL29003_ADC_PD); +} + +static int isl29003_get_power_state(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND]; + return ~cmdreg & ISL29003_ADC_PD; +} + +static int isl29003_get_adc_value(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int lsb, msb, range, bitdepth; + + mutex_lock(&data->lock); + lsb = i2c_smbus_read_byte_data(client, ISL29003_REG_LSB_SENSOR); + + if (lsb < 0) { + mutex_unlock(&data->lock); + return lsb; + } + + msb = i2c_smbus_read_byte_data(client, ISL29003_REG_MSB_SENSOR); + mutex_unlock(&data->lock); + + if (msb < 0) + return msb; + + range = isl29003_get_range(client); + bitdepth = (4 - isl29003_get_resolution(client)) * 4; + return (((msb << 8) | lsb) * gain_range[range]) >> bitdepth; +} + +/* + * sysfs layer + */ + +/* range */ +static ssize_t isl29003_show_range(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%i\n", isl29003_get_range(client)); +} + +static ssize_t isl29003_store_range(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + if ((strict_strtoul(buf, 10, &val) < 0) || (val > 3)) + return -EINVAL; + + ret = isl29003_set_range(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(range, S_IWUSR | S_IRUGO, + isl29003_show_range, isl29003_store_range); + + +/* resolution */ +static ssize_t isl29003_show_resolution(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_resolution(client)); +} + +static ssize_t isl29003_store_resolution(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + if ((strict_strtoul(buf, 10, &val) < 0) || (val > 3)) + return -EINVAL; + + ret = isl29003_set_resolution(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(resolution, S_IWUSR | S_IRUGO, + isl29003_show_resolution, isl29003_store_resolution); + +/* mode */ +static ssize_t isl29003_show_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_mode(client)); +} + +static ssize_t isl29003_store_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + if ((strict_strtoul(buf, 10, &val) < 0) || (val > 2)) + return -EINVAL; + + ret = isl29003_set_mode(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, + isl29003_show_mode, isl29003_store_mode); + + +/* power state */ +static ssize_t isl29003_show_power_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_power_state(client)); +} + +static ssize_t isl29003_store_power_state(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + if ((strict_strtoul(buf, 10, &val) < 0) || (val > 1)) + return -EINVAL; + + ret = isl29003_set_power_state(client, val); + return ret ? ret : count; +} + +static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO, + isl29003_show_power_state, isl29003_store_power_state); + + +/* lux */ +static ssize_t isl29003_show_lux(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + + /* No LUX data if not operational */ + if (!isl29003_get_power_state(client)) + return -EBUSY; + + return sprintf(buf, "%d\n", isl29003_get_adc_value(client)); +} + +static DEVICE_ATTR(lux, S_IRUGO, isl29003_show_lux, NULL); + +static struct attribute *isl29003_attributes[] = { + &dev_attr_range.attr, + &dev_attr_resolution.attr, + &dev_attr_mode.attr, + &dev_attr_power_state.attr, + &dev_attr_lux.attr, + NULL +}; + +static const struct attribute_group isl29003_attr_group = { + .attrs = isl29003_attributes, +}; + +static int isl29003_init_client(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int i; + + /* read all the registers once to fill the cache. + * if one of the reads fails, we consider the init failed */ + for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) { + int v = i2c_smbus_read_byte_data(client, i); + if (v < 0) + return -ENODEV; + + data->reg_cache[i] = v; + } + + /* set defaults */ + isl29003_set_range(client, 0); + isl29003_set_resolution(client, 0); + isl29003_set_mode(client, 0); + isl29003_set_power_state(client, 0); + + return 0; +} + +/* + * I2C layer + */ + +static int __devinit isl29003_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct isl29003_data *data; + int err = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) + return -EIO; + + data = kzalloc(sizeof(struct isl29003_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->client = client; + i2c_set_clientdata(client, data); + mutex_init(&data->lock); + + /* initialize the ISL29003 chip */ + err = isl29003_init_client(client); + if (err) + goto exit_kfree; + + /* register sysfs hooks */ + err = sysfs_create_group(&client->dev.kobj, &isl29003_attr_group); + if (err) + goto exit_kfree; + + dev_info(&client->dev, "driver version %s enabled\n", DRIVER_VERSION); + return 0; + +exit_kfree: + kfree(data); + return err; +} + +static int __devexit isl29003_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &isl29003_attr_group); + isl29003_set_power_state(client, 0); + kfree(i2c_get_clientdata(client)); + return 0; +} + +#ifdef CONFIG_PM +static int isl29003_suspend(struct i2c_client *client, pm_message_t mesg) +{ + return isl29003_set_power_state(client, 0); +} + +static int isl29003_resume(struct i2c_client *client) +{ + int i; + struct isl29003_data *data = i2c_get_clientdata(client); + + /* restore registers from cache */ + for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) + if (!i2c_smbus_write_byte_data(client, i, data->reg_cache[i])) + return -EIO; + + return 0; +} + +#else +#define isl29003_suspend NULL +#define isl29003_resume NULL +#endif /* CONFIG_PM */ + +static const struct i2c_device_id isl29003_id[] = { + { "isl29003", 0 }, + {} +}; +MODULE_DEVICE_TABLE(i2c, isl29003_id); + +static struct i2c_driver isl29003_driver = { + .driver = { + .name = ISL29003_DRV_NAME, + .owner = THIS_MODULE, + }, + .suspend = isl29003_suspend, + .resume = isl29003_resume, + .probe = isl29003_probe, + .remove = __devexit_p(isl29003_remove), + .id_table = isl29003_id, +}; + +static int __init isl29003_init(void) +{ + return i2c_add_driver(&isl29003_driver); +} + +static void __exit isl29003_exit(void) +{ + i2c_del_driver(&isl29003_driver); +} + +MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>"); +MODULE_DESCRIPTION("ISL29003 ambient light sensor driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRIVER_VERSION); + +module_init(isl29003_init); +module_exit(isl29003_exit); + diff --git a/drivers/mmc/card/sdio_uart.c b/drivers/mmc/card/sdio_uart.c index 78ad48718ab0..36a8d53ad2a2 100644 --- a/drivers/mmc/card/sdio_uart.c +++ b/drivers/mmc/card/sdio_uart.c @@ -30,6 +30,7 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/mutex.h> +#include <linux/seq_file.h> #include <linux/serial_reg.h> #include <linux/circ_buf.h> #include <linux/gfp.h> @@ -933,67 +934,64 @@ static int sdio_uart_tiocmset(struct tty_struct *tty, struct file *file, return result; } -static int sdio_uart_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int sdio_uart_proc_show(struct seq_file *m, void *v) { - int i, len = 0; - off_t begin = 0; + int i; - len += sprintf(page, "serinfo:1.0 driver%s%s revision:%s\n", + seq_printf(m, "serinfo:1.0 driver%s%s revision:%s\n", "", "", ""); - for (i = 0; i < UART_NR && len < PAGE_SIZE - 96; i++) { + for (i = 0; i < UART_NR; i++) { struct sdio_uart_port *port = sdio_uart_port_get(i); if (port) { - len += sprintf(page+len, "%d: uart:SDIO", i); + seq_printf(m, "%d: uart:SDIO", i); if(capable(CAP_SYS_ADMIN)) { - len += sprintf(page + len, " tx:%d rx:%d", + seq_printf(m, " tx:%d rx:%d", port->icount.tx, port->icount.rx); if (port->icount.frame) - len += sprintf(page + len, " fe:%d", + seq_printf(m, " fe:%d", port->icount.frame); if (port->icount.parity) - len += sprintf(page + len, " pe:%d", + seq_printf(m, " pe:%d", port->icount.parity); if (port->icount.brk) - len += sprintf(page + len, " brk:%d", + seq_printf(m, " brk:%d", port->icount.brk); if (port->icount.overrun) - len += sprintf(page + len, " oe:%d", + seq_printf(m, " oe:%d", port->icount.overrun); if (port->icount.cts) - len += sprintf(page + len, " cts:%d", + seq_printf(m, " cts:%d", port->icount.cts); if (port->icount.dsr) - len += sprintf(page + len, " dsr:%d", + seq_printf(m, " dsr:%d", port->icount.dsr); if (port->icount.rng) - len += sprintf(page + len, " rng:%d", + seq_printf(m, " rng:%d", port->icount.rng); if (port->icount.dcd) - len += sprintf(page + len, " dcd:%d", + seq_printf(m, " dcd:%d", port->icount.dcd); } - strcat(page, "\n"); - len++; sdio_uart_port_put(port); - } - - if (len + begin > off + count) - goto done; - if (len + begin < off) { - begin += len; - len = 0; + seq_putc(m, '\n'); } } - *eof = 1; + return 0; +} -done: - if (off >= len + begin) - return 0; - *start = page + (off - begin); - return (count < begin + len - off) ? count : (begin + len - off); +static int sdio_uart_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, sdio_uart_proc_show, NULL); } +static const struct file_operations sdio_uart_proc_fops = { + .owner = THIS_MODULE, + .open = sdio_uart_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static const struct tty_operations sdio_uart_ops = { .open = sdio_uart_open, .close = sdio_uart_close, @@ -1007,7 +1005,7 @@ static const struct tty_operations sdio_uart_ops = { .break_ctl = sdio_uart_break_ctl, .tiocmget = sdio_uart_tiocmget, .tiocmset = sdio_uart_tiocmset, - .read_proc = sdio_uart_read_proc, + .proc_fops = &sdio_uart_proc_fops, }; static struct tty_driver *sdio_uart_tty_driver; diff --git a/drivers/of/base.c b/drivers/of/base.c index cd17092b82bd..41c5dfd85358 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -446,6 +446,7 @@ struct of_modalias_table { }; static struct of_modalias_table of_modalias_table[] = { { "fsl,mcu-mpc8349emitx", "mcu-mpc8349emitx" }, + { "mmc-spi-slot", "mmc_spi" }, }; /** diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 2a4501dd2515..fdc864f9cf23 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -59,3 +59,13 @@ config HT_IRQ This allows native hypertransport devices to use interrupts. If unsure say Y. + +config PCI_IOV + bool "PCI IOV support" + depends on PCI + help + I/O Virtualization is a PCI feature supported by some devices + which allows them to create virtual devices which share their + physical resources. + + If unsure, say N. diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 3d07ce24f6a8..ba6af162fd39 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -29,6 +29,8 @@ obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o +obj-$(CONFIG_PCI_IOV) += iov.o + # # Some architectures use the generic PCI setup functions # diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 52b54f053be0..68f91a252595 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -133,7 +133,7 @@ int pci_bus_add_child(struct pci_bus *bus) * * Call hotplug for each new devices. */ -void pci_bus_add_devices(struct pci_bus *bus) +void pci_bus_add_devices(const struct pci_bus *bus) { struct pci_dev *dev; struct pci_bus *child; @@ -184,8 +184,10 @@ void pci_enable_bridges(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->subordinate) { - retval = pci_enable_device(dev); - pci_set_master(dev); + if (atomic_read(&dev->enable_cnt) == 0) { + retval = pci_enable_device(dev); + pci_set_master(dev); + } pci_enable_bridges(dev->subordinate); } } diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 1c1141801060..fbc63d5e459f 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -30,9 +30,8 @@ #include <linux/types.h> #include <linux/pci.h> #include <linux/pci_hotplug.h> +#include <linux/acpi.h> #include <linux/pci-acpi.h> -#include <acpi/acpi.h> -#include <acpi/acpi_bus.h> #define MY_NAME "acpi_pcihp" @@ -333,19 +332,14 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, { acpi_status status = AE_NOT_FOUND; acpi_handle handle, phandle; - struct pci_bus *pbus = bus; - struct pci_dev *pdev; - - do { - pdev = pbus->self; - if (!pdev) { - handle = acpi_get_pci_rootbridge_handle( - pci_domain_nr(pbus), pbus->number); + struct pci_bus *pbus; + + handle = NULL; + for (pbus = bus; pbus; pbus = pbus->parent) { + handle = acpi_pci_get_bridge_handle(pbus); + if (handle) break; - } - handle = DEVICE_ACPI_HANDLE(&(pdev->dev)); - pbus = pbus->parent; - } while (!handle); + } /* * _HPP settings apply to all child buses, until another _HPP is @@ -378,12 +372,10 @@ EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware); * * Attempt to take hotplug control from firmware. */ -int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) +int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags) { acpi_status status; acpi_handle chandle, handle; - struct pci_dev *pdev = dev; - struct pci_bus *parent; struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | @@ -408,33 +400,25 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); dbg("Trying to get hotplug control for %s\n", (char *)string.pointer); - status = pci_osc_control_set(handle, flags); + status = acpi_pci_osc_control_set(handle, flags); if (ACPI_SUCCESS(status)) goto got_one; kfree(string.pointer); string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL }; } - pdev = dev; - handle = DEVICE_ACPI_HANDLE(&dev->dev); - while (!handle) { + handle = DEVICE_ACPI_HANDLE(&pdev->dev); + if (!handle) { /* * This hotplug controller was not listed in the ACPI name * space at all. Try to get acpi handle of parent pci bus. */ - if (!pdev || !pdev->bus->parent) - break; - parent = pdev->bus->parent; - dbg("Could not find %s in acpi namespace, trying parent\n", - pci_name(pdev)); - if (!parent->self) - /* Parent must be a host bridge */ - handle = acpi_get_pci_rootbridge_handle( - pci_domain_nr(parent), - parent->number); - else - handle = DEVICE_ACPI_HANDLE(&(parent->self->dev)); - pdev = parent->self; + struct pci_bus *pbus; + for (pbus = pdev->bus; pbus; pbus = pbus->parent) { + handle = acpi_pci_get_bridge_handle(pbus); + if (handle) + break; + } } while (handle) { @@ -453,13 +437,13 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags) } dbg("Cannot get control of hotplug hardware for pci %s\n", - pci_name(dev)); + pci_name(pdev)); kfree(string.pointer); return -ENODEV; got_one: - dbg("Gained control for hotplug HW for pci %s (%s)\n", pci_name(dev), - (char *)string.pointer); + dbg("Gained control for hotplug HW for pci %s (%s)\n", + pci_name(pdev), (char *)string.pointer); kfree(string.pointer); return 0; } diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index d8649e127298..6151389fd903 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -1,395 +1,163 @@ -/* - * Fake PCI Hot Plug Controller Driver +/* Works like the fakephp driver used to, except a little better. * - * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com> - * Copyright (C) 2003 IBM Corp. - * Copyright (C) 2003 Rolf Eike Beer <eike-kernel@sf-tec.de> + * - It's possible to remove devices with subordinate busses. + * - New PCI devices that appear via any method, not just a fakephp triggered + * rescan, will be noticed. + * - Devices that are removed via any method, not just a fakephp triggered + * removal, will also be noticed. * - * Based on ideas and code from: - * Vladimir Kondratiev <vladimir.kondratiev@intel.com> - * Rolf Eike Beer <eike-kernel@sf-tec.de> + * Uses nothing from the pci-hotplug subsystem. * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 2 of the License. - * - * Send feedback to <greg@kroah.com> */ -/* - * - * This driver will "emulate" removing PCI devices from the system. If - * the "power" file is written to with "0" then the specified PCI device - * will be completely removed from the kernel. - * - * WARNING, this does NOT turn off the power to the PCI device. This is - * a "logical" removal, not a physical or electrical removal. - * - * Use this module at your own risk, you have been warned! - * - * Enabling PCI devices is left as an exercise for the reader... - * - */ -#include <linux/kernel.h> #include <linux/module.h> -#include <linux/pci.h> -#include <linux/pci_hotplug.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> #include <linux/init.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/workqueue.h> +#include <linux/pci.h> +#include <linux/device.h> #include "../pci.h" -#if !defined(MODULE) - #define MY_NAME "fakephp" -#else - #define MY_NAME THIS_MODULE->name -#endif - -#define dbg(format, arg...) \ - do { \ - if (debug) \ - printk(KERN_DEBUG "%s: " format, \ - MY_NAME , ## arg); \ - } while (0) -#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg) -#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg) - -#define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>" -#define DRIVER_DESC "Fake PCI Hot Plug Controller Driver" - -struct dummy_slot { - struct list_head node; - struct hotplug_slot *slot; - struct pci_dev *dev; - struct work_struct remove_work; - unsigned long removed; +struct legacy_slot { + struct kobject kobj; + struct pci_dev *dev; + struct list_head list; }; -static int debug; -static int dup_slots; -static LIST_HEAD(slot_list); -static struct workqueue_struct *dummyphp_wq; - -static void pci_rescan_worker(struct work_struct *work); -static DECLARE_WORK(pci_rescan_work, pci_rescan_worker); - -static int enable_slot (struct hotplug_slot *slot); -static int disable_slot (struct hotplug_slot *slot); +static LIST_HEAD(legacy_list); -static struct hotplug_slot_ops dummy_hotplug_slot_ops = { - .owner = THIS_MODULE, - .enable_slot = enable_slot, - .disable_slot = disable_slot, -}; - -static void dummy_release(struct hotplug_slot *slot) +static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr, + char *buf) { - struct dummy_slot *dslot = slot->private; - - list_del(&dslot->node); - kfree(dslot->slot->info); - kfree(dslot->slot); - pci_dev_put(dslot->dev); - kfree(dslot); + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + strcpy(buf, "1\n"); + return 2; } -#define SLOT_NAME_SIZE 8 - -static int add_slot(struct pci_dev *dev) +static void remove_callback(void *data) { - struct dummy_slot *dslot; - struct hotplug_slot *slot; - char name[SLOT_NAME_SIZE]; - int retval = -ENOMEM; - static int count = 1; - - slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL); - if (!slot) - goto error; - - slot->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL); - if (!slot->info) - goto error_slot; - - slot->info->power_status = 1; - slot->info->max_bus_speed = PCI_SPEED_UNKNOWN; - slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN; - - dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL); - if (!dslot) - goto error_info; - - if (dup_slots) - snprintf(name, SLOT_NAME_SIZE, "fake"); - else - snprintf(name, SLOT_NAME_SIZE, "fake%d", count++); - dbg("slot->name = %s\n", name); - slot->ops = &dummy_hotplug_slot_ops; - slot->release = &dummy_release; - slot->private = dslot; - - retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn), name); - if (retval) { - err("pci_hp_register failed with error %d\n", retval); - goto error_dslot; - } - - dbg("slot->name = %s\n", hotplug_slot_name(slot)); - dslot->slot = slot; - dslot->dev = pci_dev_get(dev); - list_add (&dslot->node, &slot_list); - return retval; - -error_dslot: - kfree(dslot); -error_info: - kfree(slot->info); -error_slot: - kfree(slot); -error: - return retval; + pci_remove_bus_device((struct pci_dev *)data); } -static int __init pci_scan_buses(void) +static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) { - struct pci_dev *dev = NULL; - int lastslot = 0; + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); + unsigned long val; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - if (PCI_FUNC(dev->devfn) > 0 && - lastslot == PCI_SLOT(dev->devfn)) - continue; - lastslot = PCI_SLOT(dev->devfn); - add_slot(dev); - } + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; - return 0; + if (val) + pci_rescan_bus(slot->dev->bus); + else + sysfs_schedule_callback(&slot->dev->dev.kobj, remove_callback, + slot->dev, THIS_MODULE); + return len; } -static void remove_slot(struct dummy_slot *dslot) -{ - int retval; - - dbg("removing slot %s\n", hotplug_slot_name(dslot->slot)); - retval = pci_hp_deregister(dslot->slot); - if (retval) - err("Problem unregistering a slot %s\n", - hotplug_slot_name(dslot->slot)); -} +static struct attribute *legacy_attrs[] = { + &(struct attribute){ .name = "power", .mode = 0644 }, + NULL, +}; -/* called from the single-threaded workqueue handler to remove a slot */ -static void remove_slot_worker(struct work_struct *work) +static void legacy_release(struct kobject *kobj) { - struct dummy_slot *dslot = - container_of(work, struct dummy_slot, remove_work); - remove_slot(dslot); -} + struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj); -/** - * pci_rescan_slot - Rescan slot - * @temp: Device template. Should be set: bus and devfn. - * - * Tries hard not to re-enable already existing devices; - * also handles scanning of subfunctions. - */ -static int pci_rescan_slot(struct pci_dev *temp) -{ - struct pci_bus *bus = temp->bus; - struct pci_dev *dev; - int func; - u8 hdr_type; - int count = 0; - - if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) { - temp->hdr_type = hdr_type & 0x7f; - if ((dev = pci_get_slot(bus, temp->devfn)) != NULL) - pci_dev_put(dev); - else { - dev = pci_scan_single_device(bus, temp->devfn); - if (dev) { - dbg("New device on %s function %x:%x\n", - bus->name, temp->devfn >> 3, - temp->devfn & 7); - count++; - } - } - /* multifunction device? */ - if (!(hdr_type & 0x80)) - return count; - - /* continue scanning for other functions */ - for (func = 1, temp->devfn++; func < 8; func++, temp->devfn++) { - if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) - continue; - temp->hdr_type = hdr_type & 0x7f; - - if ((dev = pci_get_slot(bus, temp->devfn)) != NULL) - pci_dev_put(dev); - else { - dev = pci_scan_single_device(bus, temp->devfn); - if (dev) { - dbg("New device on %s function %x:%x\n", - bus->name, temp->devfn >> 3, - temp->devfn & 7); - count++; - } - } - } - } - - return count; + pci_dev_put(slot->dev); + kfree(slot); } +static struct kobj_type legacy_ktype = { + .sysfs_ops = &(struct sysfs_ops){ + .store = legacy_store, .show = legacy_show + }, + .release = &legacy_release, + .default_attrs = legacy_attrs, +}; -/** - * pci_rescan_bus - Rescan PCI bus - * @bus: the PCI bus to rescan - * - * Call pci_rescan_slot for each possible function of the bus. - */ -static void pci_rescan_bus(const struct pci_bus *bus) +static int legacy_add_slot(struct pci_dev *pdev) { - unsigned int devfn; - struct pci_dev *dev; - int retval; - int found = 0; - dev = alloc_pci_dev(); - if (!dev) - return; + struct legacy_slot *slot = kzalloc(sizeof(*slot), GFP_KERNEL); - dev->bus = (struct pci_bus*)bus; - dev->sysdata = bus->sysdata; - for (devfn = 0; devfn < 0x100; devfn += 8) { - dev->devfn = devfn; - found += pci_rescan_slot(dev); - } - - if (found) { - pci_bus_assign_resources(bus); - list_for_each_entry(dev, &bus->devices, bus_list) { - /* Skip already-added devices */ - if (dev->is_added) - continue; - retval = pci_bus_add_device(dev); - if (retval) - dev_err(&dev->dev, - "Error adding device, continuing\n"); - else - add_slot(dev); - } - pci_bus_add_devices(bus); - } - kfree(dev); -} + if (!slot) + return -ENOMEM; -/* recursively scan all buses */ -static void pci_rescan_buses(const struct list_head *list) -{ - const struct list_head *l; - list_for_each(l,list) { - const struct pci_bus *b = pci_bus_b(l); - pci_rescan_bus(b); - pci_rescan_buses(&b->children); + if (kobject_init_and_add(&slot->kobj, &legacy_ktype, + &pci_slots_kset->kobj, "%s", + dev_name(&pdev->dev))) { + dev_warn(&pdev->dev, "Failed to created legacy fake slot\n"); + return -EINVAL; } -} + slot->dev = pci_dev_get(pdev); -/* initiate rescan of all pci buses */ -static inline void pci_rescan(void) { - pci_rescan_buses(&pci_root_buses); -} - -/* called from the single-threaded workqueue handler to rescan all pci buses */ -static void pci_rescan_worker(struct work_struct *work) -{ - pci_rescan(); -} + list_add(&slot->list, &legacy_list); -static int enable_slot(struct hotplug_slot *hotplug_slot) -{ - /* mis-use enable_slot for rescanning of the pci bus */ - cancel_work_sync(&pci_rescan_work); - queue_work(dummyphp_wq, &pci_rescan_work); return 0; } -static int disable_slot(struct hotplug_slot *slot) +static int legacy_notify(struct notifier_block *nb, + unsigned long action, void *data) { - struct dummy_slot *dslot; - struct pci_dev *dev; - int func; - - if (!slot) - return -ENODEV; - dslot = slot->private; - - dbg("%s - physical_slot = %s\n", __func__, hotplug_slot_name(slot)); + struct pci_dev *pdev = to_pci_dev(data); - for (func = 7; func >= 0; func--) { - dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func); - if (!dev) - continue; + if (action == BUS_NOTIFY_ADD_DEVICE) { + legacy_add_slot(pdev); + } else if (action == BUS_NOTIFY_DEL_DEVICE) { + struct legacy_slot *slot; - if (test_and_set_bit(0, &dslot->removed)) { - dbg("Slot already scheduled for removal\n"); - pci_dev_put(dev); - return -ENODEV; - } + list_for_each_entry(slot, &legacy_list, list) + if (slot->dev == pdev) + goto found; - /* remove the device from the pci core */ - pci_remove_bus_device(dev); - - /* queue work item to blow away this sysfs entry and other - * parts. - */ - INIT_WORK(&dslot->remove_work, remove_slot_worker); - queue_work(dummyphp_wq, &dslot->remove_work); - - pci_dev_put(dev); + dev_warn(&pdev->dev, "Missing legacy fake slot?"); + return -ENODEV; +found: + kobject_del(&slot->kobj); + list_del(&slot->list); + kobject_put(&slot->kobj); } + return 0; } -static void cleanup_slots (void) -{ - struct list_head *tmp; - struct list_head *next; - struct dummy_slot *dslot; - - destroy_workqueue(dummyphp_wq); - list_for_each_safe (tmp, next, &slot_list) { - dslot = list_entry (tmp, struct dummy_slot, node); - remove_slot(dslot); - } - -} +static struct notifier_block legacy_notifier = { + .notifier_call = legacy_notify +}; -static int __init dummyphp_init(void) +static int __init init_legacy(void) { - info(DRIVER_DESC "\n"); + struct pci_dev *pdev = NULL; - dummyphp_wq = create_singlethread_workqueue(MY_NAME); - if (!dummyphp_wq) - return -ENOMEM; + /* Add existing devices */ + while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) + legacy_add_slot(pdev); - return pci_scan_buses(); + /* Be alerted of any new ones */ + bus_register_notifier(&pci_bus_type, &legacy_notifier); + return 0; } +module_init(init_legacy); - -static void __exit dummyphp_exit(void) +static void __exit remove_legacy(void) { - cleanup_slots(); + struct legacy_slot *slot, *tmp; + + bus_unregister_notifier(&pci_bus_type, &legacy_notifier); + + list_for_each_entry_safe(slot, tmp, &legacy_list, list) { + list_del(&slot->list); + kobject_del(&slot->kobj); + kobject_put(&slot->kobj); + } } +module_exit(remove_legacy); -module_init(dummyphp_init); -module_exit(dummyphp_exit); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_AUTHOR("Trent Piepho <xyzzy@speakeasy.org>"); +MODULE_DESCRIPTION("Legacy version of the fakephp interface"); MODULE_LICENSE("GPL"); -module_param(debug, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(debug, "Debugging mode enabled or not"); -module_param(dup_slots, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(dup_slots, "Force duplicate slot names for debugging"); diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 39ae37589fda..0a368547e633 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -46,10 +46,10 @@ extern int pciehp_force; extern struct workqueue_struct *pciehp_wq; #define dbg(format, arg...) \ - do { \ - if (pciehp_debug) \ - printk("%s: " format, MY_NAME , ## arg); \ - } while (0) +do { \ + if (pciehp_debug) \ + printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \ +} while (0) #define err(format, arg...) \ printk(KERN_ERR "%s: " format, MY_NAME , ## arg) #define info(format, arg...) \ @@ -60,7 +60,7 @@ extern struct workqueue_struct *pciehp_wq; #define ctrl_dbg(ctrl, format, arg...) \ do { \ if (pciehp_debug) \ - dev_printk(, &ctrl->pcie->device, \ + dev_printk(KERN_DEBUG, &ctrl->pcie->device, \ format, ## arg); \ } while (0) #define ctrl_err(ctrl, format, arg...) \ @@ -108,10 +108,11 @@ struct controller { u32 slot_cap; u8 cap_base; struct timer_list poll_timer; - int cmd_busy; + unsigned int cmd_busy:1; unsigned int no_cmd_complete:1; unsigned int link_active_reporting:1; unsigned int notification_enabled:1; + unsigned int power_fault_detected; }; #define INT_BUTTON_IGNORE 0 diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 438d795f9fe3..96048010e7d9 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -67,37 +67,27 @@ static int __init parse_detect_mode(void) return PCIEHP_DETECT_DEFAULT; } -static struct pcie_port_service_id __initdata port_pci_ids[] = { - { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_ANY_PORT, - .service_type = PCIE_PORT_SERVICE_HP, - .driver_data = 0, - }, { /* end: all zeroes */ } -}; - static int __initdata dup_slot_id; static int __initdata acpi_slot_detected; static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots); /* Dummy driver for dumplicate name detection */ -static int __init dummy_probe(struct pcie_device *dev, - const struct pcie_port_service_id *id) +static int __init dummy_probe(struct pcie_device *dev) { int pos; u32 slot_cap; struct slot *slot, *tmp; struct pci_dev *pdev = dev->port; struct pci_bus *pbus = pdev->subordinate; - if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL))) - return -ENOMEM; /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ if (pciehp_get_hp_hw_control_from_firmware(pdev)) return -ENODEV; if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP))) return -ENODEV; pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap); + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) + return -ENOMEM; slot->number = slot_cap >> 19; list_for_each_entry(tmp, &dummy_slots, slot_list) { if (tmp->number == slot->number) @@ -111,7 +101,8 @@ static int __init dummy_probe(struct pcie_device *dev, static struct pcie_port_service_driver __initdata dummy_driver = { .name = "pciehp_dummy", - .id_table = port_pci_ids, + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_HP, .probe = dummy_probe, }; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 681e3912b821..fb254b2454de 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -401,7 +401,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe return 0; } -static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_id *id) +static int pciehp_probe(struct pcie_device *dev) { int rc; struct controller *ctrl; @@ -475,7 +475,7 @@ static void pciehp_remove (struct pcie_device *dev) } #ifdef CONFIG_PM -static int pciehp_suspend (struct pcie_device *dev, pm_message_t state) +static int pciehp_suspend (struct pcie_device *dev) { dev_info(&dev->device, "%s ENTRY\n", __func__); return 0; @@ -503,20 +503,12 @@ static int pciehp_resume (struct pcie_device *dev) } return 0; } -#endif - -static struct pcie_port_service_id port_pci_ids[] = { { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_ANY_PORT, - .service_type = PCIE_PORT_SERVICE_HP, - .driver_data = 0, - }, { /* end: all zeroes */ } -}; +#endif /* PM */ static struct pcie_port_service_driver hpdriver_portdrv = { .name = PCIE_MODULE_NAME, - .id_table = &port_pci_ids[0], + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_HP, .probe = pciehp_probe, .remove = pciehp_remove, diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 7a16c6897bb9..07bd32151146 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -548,23 +548,21 @@ static int hpc_power_on_slot(struct slot * slot) slot_cmd = POWER_ON; cmd_mask = PCI_EXP_SLTCTL_PCC; - /* Enable detection that we turned off at slot power-off time */ if (!pciehp_poll_mode) { - slot_cmd |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); - cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); + /* Enable power fault detection turned off at power off time */ + slot_cmd |= PCI_EXP_SLTCTL_PFDE; + cmd_mask |= PCI_EXP_SLTCTL_PFDE; } retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); - if (retval) { ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd); - return -1; + return retval; } ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); + ctrl->power_fault_detected = 0; return retval; } @@ -621,18 +619,10 @@ static int hpc_power_off_slot(struct slot * slot) slot_cmd = POWER_OFF; cmd_mask = PCI_EXP_SLTCTL_PCC; - /* - * If we get MRL or presence detect interrupts now, the isr - * will notice the sticky power-fault bit too and issue power - * indicator change commands. This will lead to an endless loop - * of command completions, since the power-fault bit remains on - * till the slot is powered on again. - */ if (!pciehp_poll_mode) { - slot_cmd &= ~(PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); - cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | - PCI_EXP_SLTCTL_PDCE); + /* Disable power fault detection */ + slot_cmd &= ~PCI_EXP_SLTCTL_PFDE; + cmd_mask |= PCI_EXP_SLTCTL_PFDE; } retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); @@ -672,10 +662,11 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC); + detected &= ~intr_loc; intr_loc |= detected; if (!intr_loc) return IRQ_NONE; - if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, detected)) { + if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, intr_loc)) { ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n", __func__); return IRQ_NONE; @@ -709,9 +700,10 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) pciehp_handle_presence_change(p_slot); /* Check Power Fault Detected */ - if (intr_loc & PCI_EXP_SLTSTA_PFD) + if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { + ctrl->power_fault_detected = 1; pciehp_handle_power_fault(p_slot); - + } return IRQ_HANDLED; } diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index 6aba0b6cf2e0..974e924ca96d 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -48,10 +48,10 @@ extern int shpchp_debug; extern struct workqueue_struct *shpchp_wq; #define dbg(format, arg...) \ - do { \ - if (shpchp_debug) \ - printk("%s: " format, MY_NAME , ## arg); \ - } while (0) +do { \ + if (shpchp_debug) \ + printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg); \ +} while (0) #define err(format, arg...) \ printk(KERN_ERR "%s: " format, MY_NAME , ## arg) #define info(format, arg...) \ @@ -62,7 +62,7 @@ extern struct workqueue_struct *shpchp_wq; #define ctrl_dbg(ctrl, format, arg...) \ do { \ if (shpchp_debug) \ - dev_printk(, &ctrl->pci_dev->dev, \ + dev_printk(KERN_DEBUG, &ctrl->pci_dev->dev, \ format, ## arg); \ } while (0) #define ctrl_err(ctrl, format, arg...) \ diff --git a/drivers/pci/hotplug/shpchp_pci.c b/drivers/pci/hotplug/shpchp_pci.c index 138f161becc0..aa315e52529b 100644 --- a/drivers/pci/hotplug/shpchp_pci.c +++ b/drivers/pci/hotplug/shpchp_pci.c @@ -137,7 +137,7 @@ int __ref shpchp_configure_device(struct slot *p_slot) busnr)) break; } - if (busnr >= end) { + if (busnr > end) { ctrl_err(ctrl, "No free bus for hot-added bridge\n"); pci_dev_put(dev); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 49402c399232..9dbd5066acaf 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1782,7 +1782,7 @@ static inline void iommu_prepare_isa(void) ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); if (ret) - printk("IOMMU: Failed to create 0-64M identity map, " + printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " "floppy might not work\n"); } diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c new file mode 100644 index 000000000000..7227efc760db --- /dev/null +++ b/drivers/pci/iov.c @@ -0,0 +1,680 @@ +/* + * drivers/pci/iov.c + * + * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com> + * + * PCI Express I/O Virtualization (IOV) support. + * Single Root IOV 1.0 + */ + +#include <linux/pci.h> +#include <linux/mutex.h> +#include <linux/string.h> +#include <linux/delay.h> +#include "pci.h" + +#define VIRTFN_ID_LEN 16 + +static inline u8 virtfn_bus(struct pci_dev *dev, int id) +{ + return dev->bus->number + ((dev->devfn + dev->sriov->offset + + dev->sriov->stride * id) >> 8); +} + +static inline u8 virtfn_devfn(struct pci_dev *dev, int id) +{ + return (dev->devfn + dev->sriov->offset + + dev->sriov->stride * id) & 0xff; +} + +static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) +{ + int rc; + struct pci_bus *child; + + if (bus->number == busnr) + return bus; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + if (child) + return child; + + child = pci_add_new_bus(bus, NULL, busnr); + if (!child) + return NULL; + + child->subordinate = busnr; + child->dev.parent = bus->bridge; + rc = pci_bus_add_child(child); + if (rc) { + pci_remove_bus(child); + return NULL; + } + + return child; +} + +static void virtfn_remove_bus(struct pci_bus *bus, int busnr) +{ + struct pci_bus *child; + + if (bus->number == busnr) + return; + + child = pci_find_bus(pci_domain_nr(bus), busnr); + BUG_ON(!child); + + if (list_empty(&child->devices)) + pci_remove_bus(child); +} + +static int virtfn_add(struct pci_dev *dev, int id, int reset) +{ + int i; + int rc; + u64 size; + char buf[VIRTFN_ID_LEN]; + struct pci_dev *virtfn; + struct resource *res; + struct pci_sriov *iov = dev->sriov; + + virtfn = alloc_pci_dev(); + if (!virtfn) + return -ENOMEM; + + mutex_lock(&iov->dev->sriov->lock); + virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id)); + if (!virtfn->bus) { + kfree(virtfn); + mutex_unlock(&iov->dev->sriov->lock); + return -ENOMEM; + } + virtfn->devfn = virtfn_devfn(dev, id); + virtfn->vendor = dev->vendor; + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); + pci_setup_device(virtfn); + virtfn->dev.parent = dev->dev.parent; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (!res->parent) + continue; + virtfn->resource[i].name = pci_name(virtfn); + virtfn->resource[i].flags = res->flags; + size = resource_size(res); + do_div(size, iov->total); + virtfn->resource[i].start = res->start + size * id; + virtfn->resource[i].end = virtfn->resource[i].start + size - 1; + rc = request_resource(res, &virtfn->resource[i]); + BUG_ON(rc); + } + + if (reset) + pci_execute_reset_function(virtfn); + + pci_device_add(virtfn, virtfn->bus); + mutex_unlock(&iov->dev->sriov->lock); + + virtfn->physfn = pci_dev_get(dev); + virtfn->is_virtfn = 1; + + rc = pci_bus_add_device(virtfn); + if (rc) + goto failed1; + sprintf(buf, "virtfn%u", id); + rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); + if (rc) + goto failed1; + rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); + if (rc) + goto failed2; + + kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); + + return 0; + +failed2: + sysfs_remove_link(&dev->dev.kobj, buf); +failed1: + pci_dev_put(dev); + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + return rc; +} + +static void virtfn_remove(struct pci_dev *dev, int id, int reset) +{ + char buf[VIRTFN_ID_LEN]; + struct pci_bus *bus; + struct pci_dev *virtfn; + struct pci_sriov *iov = dev->sriov; + + bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id)); + if (!bus) + return; + + virtfn = pci_get_slot(bus, virtfn_devfn(dev, id)); + if (!virtfn) + return; + + pci_dev_put(virtfn); + + if (reset) { + device_release_driver(&virtfn->dev); + pci_execute_reset_function(virtfn); + } + + sprintf(buf, "virtfn%u", id); + sysfs_remove_link(&dev->dev.kobj, buf); + sysfs_remove_link(&virtfn->dev.kobj, "physfn"); + + mutex_lock(&iov->dev->sriov->lock); + pci_remove_bus_device(virtfn); + virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); + mutex_unlock(&iov->dev->sriov->lock); + + pci_dev_put(dev); +} + +static int sriov_migration(struct pci_dev *dev) +{ + u16 status; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return 0; + + if (!(iov->cap & PCI_SRIOV_CAP_VFM)) + return 0; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status); + if (!(status & PCI_SRIOV_STATUS_VFM)) + return 0; + + schedule_work(&iov->mtask); + + return 1; +} + +static void sriov_migration_task(struct work_struct *work) +{ + int i; + u8 state; + u16 status; + struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask); + + for (i = iov->initial; i < iov->nr_virtfn; i++) { + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_MI) { + writeb(PCI_SRIOV_VFM_AV, iov->mstate + i); + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_AV) + virtfn_add(iov->self, i, 1); + } else if (state == PCI_SRIOV_VFM_MO) { + virtfn_remove(iov->self, i, 1); + writeb(PCI_SRIOV_VFM_UA, iov->mstate + i); + state = readb(iov->mstate + i); + if (state == PCI_SRIOV_VFM_AV) + virtfn_add(iov->self, i, 0); + } + } + + pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status); + status &= ~PCI_SRIOV_STATUS_VFM; + pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status); +} + +static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn) +{ + int bir; + u32 table; + resource_size_t pa; + struct pci_sriov *iov = dev->sriov; + + if (nr_virtfn <= iov->initial) + return 0; + + pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table); + bir = PCI_SRIOV_VFM_BIR(table); + if (bir > PCI_STD_RESOURCE_END) + return -EIO; + + table = PCI_SRIOV_VFM_OFFSET(table); + if (table + nr_virtfn > pci_resource_len(dev, bir)) + return -EIO; + + pa = pci_resource_start(dev, bir) + table; + iov->mstate = ioremap(pa, nr_virtfn); + if (!iov->mstate) + return -ENOMEM; + + INIT_WORK(&iov->mtask, sriov_migration_task); + + iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR; + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + + return 0; +} + +static void sriov_disable_migration(struct pci_dev *dev) +{ + struct pci_sriov *iov = dev->sriov; + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + + cancel_work_sync(&iov->mtask); + iounmap(iov->mstate); +} + +static int sriov_enable(struct pci_dev *dev, int nr_virtfn) +{ + int rc; + int i, j; + int nres; + u16 offset, stride, initial; + struct resource *res; + struct pci_dev *pdev; + struct pci_sriov *iov = dev->sriov; + + if (!nr_virtfn) + return 0; + + if (iov->nr_virtfn) + return -EINVAL; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial); + if (initial > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total))) + return -EIO; + + if (nr_virtfn < 0 || nr_virtfn > iov->total || + (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) + return -EINVAL; + + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (nr_virtfn > 1 && !stride)) + return -EIO; + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (res->parent) + nres++; + } + if (nres != iov->nres) { + dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n"); + return -ENOMEM; + } + + iov->offset = offset; + iov->stride = stride; + + if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) { + dev_err(&dev->dev, "SR-IOV: bus number out of range\n"); + return -ENOMEM; + } + + if (iov->link != dev->devfn) { + pdev = pci_get_slot(dev->bus, iov->link); + if (!pdev) + return -ENODEV; + + pci_dev_put(pdev); + + if (!pdev->is_physfn) + return -ENODEV; + + rc = sysfs_create_link(&dev->dev.kobj, + &pdev->dev.kobj, "dep_link"); + if (rc) + return rc; + } + + iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + msleep(100); + pci_unblock_user_cfg_access(dev); + + iov->initial = initial; + if (nr_virtfn < initial) + initial = nr_virtfn; + + for (i = 0; i < initial; i++) { + rc = virtfn_add(dev, i, 0); + if (rc) + goto failed; + } + + if (iov->cap & PCI_SRIOV_CAP_VFM) { + rc = sriov_enable_migration(dev, nr_virtfn); + if (rc) + goto failed; + } + + kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); + iov->nr_virtfn = nr_virtfn; + + return 0; + +failed: + for (j = 0; j < i; j++) + virtfn_remove(dev, j, 0); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + return rc; +} + +static void sriov_disable(struct pci_dev *dev) +{ + int i; + struct pci_sriov *iov = dev->sriov; + + if (!iov->nr_virtfn) + return; + + if (iov->cap & PCI_SRIOV_CAP_VFM) + sriov_disable_migration(dev); + + for (i = 0; i < iov->nr_virtfn; i++) + virtfn_remove(dev, i, 0); + + iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); + pci_block_user_cfg_access(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + ssleep(1); + pci_unblock_user_cfg_access(dev); + + if (iov->link != dev->devfn) + sysfs_remove_link(&dev->dev.kobj, "dep_link"); + + iov->nr_virtfn = 0; +} + +static int sriov_init(struct pci_dev *dev, int pos) +{ + int i; + int rc; + int nres; + u32 pgsz; + u16 ctrl, total, offset, stride; + struct pci_sriov *iov; + struct resource *res; + struct pci_dev *pdev; + + if (dev->pcie_type != PCI_EXP_TYPE_RC_END && + dev->pcie_type != PCI_EXP_TYPE_ENDPOINT) + return -ENODEV; + + pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl); + if (ctrl & PCI_SRIOV_CTRL_VFE) { + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0); + ssleep(1); + } + + pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); + if (!total) + return 0; + + ctrl = 0; + list_for_each_entry(pdev, &dev->bus->devices, bus_list) + if (pdev->is_physfn) + goto found; + + pdev = NULL; + if (pci_ari_enabled(dev->bus)) + ctrl |= PCI_SRIOV_CTRL_ARI; + +found: + pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl); + pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride); + if (!offset || (total > 1 && !stride)) + return -EIO; + + pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz); + i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; + pgsz &= ~((1 << i) - 1); + if (!pgsz) + return -EIO; + + pgsz &= ~(pgsz - 1); + pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz); + + nres = 0; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + i += __pci_read_base(dev, pci_bar_unknown, res, + pos + PCI_SRIOV_BAR + i * 4); + if (!res->flags) + continue; + if (resource_size(res) & (PAGE_SIZE - 1)) { + rc = -EIO; + goto failed; + } + res->end = res->start + resource_size(res) * total - 1; + nres++; + } + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) { + rc = -ENOMEM; + goto failed; + } + + iov->pos = pos; + iov->nres = nres; + iov->ctrl = ctrl; + iov->total = total; + iov->offset = offset; + iov->stride = stride; + iov->pgsz = pgsz; + iov->self = dev; + pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); + pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); + + if (pdev) + iov->dev = pci_dev_get(pdev); + else { + iov->dev = dev; + mutex_init(&iov->lock); + } + + dev->sriov = iov; + dev->is_physfn = 1; + + return 0; + +failed: + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + res->flags = 0; + } + + return rc; +} + +static void sriov_release(struct pci_dev *dev) +{ + BUG_ON(dev->sriov->nr_virtfn); + + if (dev == dev->sriov->dev) + mutex_destroy(&dev->sriov->lock); + else + pci_dev_put(dev->sriov->dev); + + kfree(dev->sriov); + dev->sriov = NULL; +} + +static void sriov_restore_state(struct pci_dev *dev) +{ + int i; + u16 ctrl; + struct pci_sriov *iov = dev->sriov; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl); + if (ctrl & PCI_SRIOV_CTRL_VFE) + return; + + for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) + pci_update_resource(dev, i); + + pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + if (iov->ctrl & PCI_SRIOV_CTRL_VFE) + msleep(100); +} + +/** + * pci_iov_init - initialize the IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_iov_init(struct pci_dev *dev) +{ + int pos; + + if (!dev->is_pcie) + return -ENODEV; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (pos) + return sriov_init(dev, pos); + + return -ENODEV; +} + +/** + * pci_iov_release - release resources used by the IOV capability + * @dev: the PCI device + */ +void pci_iov_release(struct pci_dev *dev) +{ + if (dev->is_physfn) + sriov_release(dev); +} + +/** + * pci_iov_resource_bar - get position of the SR-IOV BAR + * @dev: the PCI device + * @resno: the resource number + * @type: the BAR type to be filled in + * + * Returns position of the BAR encapsulated in the SR-IOV capability. + */ +int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END) + return 0; + + BUG_ON(!dev->is_physfn); + + *type = pci_bar_unknown; + + return dev->sriov->pos + PCI_SRIOV_BAR + + 4 * (resno - PCI_IOV_RESOURCES); +} + +/** + * pci_restore_iov_state - restore the state of the IOV capability + * @dev: the PCI device + */ +void pci_restore_iov_state(struct pci_dev *dev) +{ + if (dev->is_physfn) + sriov_restore_state(dev); +} + +/** + * pci_iov_bus_range - find bus range used by Virtual Function + * @bus: the PCI bus + * + * Returns max number of buses (exclude current one) used by Virtual + * Functions. + */ +int pci_iov_bus_range(struct pci_bus *bus) +{ + int max = 0; + u8 busnr; + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (!dev->is_physfn) + continue; + busnr = virtfn_bus(dev, dev->sriov->total - 1); + if (busnr > max) + max = busnr; + } + + return max ? max - bus->number : 0; +} + +/** + * pci_enable_sriov - enable the SR-IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + might_sleep(); + + if (!dev->is_physfn) + return -ENODEV; + + return sriov_enable(dev, nr_virtfn); +} +EXPORT_SYMBOL_GPL(pci_enable_sriov); + +/** + * pci_disable_sriov - disable the SR-IOV capability + * @dev: the PCI device + */ +void pci_disable_sriov(struct pci_dev *dev) +{ + might_sleep(); + + if (!dev->is_physfn) + return; + + sriov_disable(dev); +} +EXPORT_SYMBOL_GPL(pci_disable_sriov); + +/** + * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration + * @dev: the PCI device + * + * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not. + * + * Physical Function driver is responsible to register IRQ handler using + * VF Migration Interrupt Message Number, and call this function when the + * interrupt is generated by the hardware. + */ +irqreturn_t pci_sriov_migration(struct pci_dev *dev) +{ + if (!dev->is_physfn) + return IRQ_NONE; + + return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE; +} +EXPORT_SYMBOL_GPL(pci_sriov_migration); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index baba2eb5367d..6f2e6295e773 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -27,48 +27,53 @@ static int pci_msi_enable = 1; /* Arch hooks */ -int __attribute__ ((weak)) -arch_msi_check_device(struct pci_dev *dev, int nvec, int type) +#ifndef arch_msi_check_device +int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) { return 0; } +#endif -int __attribute__ ((weak)) -arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry) -{ - return 0; -} - -int __attribute__ ((weak)) -arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +#ifndef arch_setup_msi_irqs +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct msi_desc *entry; int ret; + /* + * If an architecture wants to support multiple MSI, it needs to + * override arch_setup_msi_irqs() + */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + list_for_each_entry(entry, &dev->msi_list, list) { ret = arch_setup_msi_irq(dev, entry); - if (ret) + if (ret < 0) return ret; + if (ret > 0) + return -ENOSPC; } return 0; } +#endif -void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq) -{ - return; -} - -void __attribute__ ((weak)) -arch_teardown_msi_irqs(struct pci_dev *dev) +#ifndef arch_teardown_msi_irqs +void arch_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq != 0) - arch_teardown_msi_irq(entry->irq); + int i, nvec; + if (entry->irq == 0) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + arch_teardown_msi_irq(entry->irq + i); } } +#endif static void __msi_set_enable(struct pci_dev *dev, int pos, int enable) { @@ -111,27 +116,14 @@ static inline __attribute_const__ u32 msi_mask(unsigned x) return (1 << (1 << x)) - 1; } -static void msix_flush_writes(struct irq_desc *desc) +static inline __attribute_const__ u32 msi_capable_mask(u16 control) { - struct msi_desc *entry; + return msi_mask((control >> 1) & 7); +} - entry = get_irq_desc_msi(desc); - BUG_ON(!entry || !entry->dev); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - /* nothing to do */ - break; - case PCI_CAP_ID_MSIX: - { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; - readl(entry->mask_base + offset); - break; - } - default: - BUG(); - break; - } +static inline __attribute_const__ u32 msi_enabled_mask(u16 control) +{ + return msi_mask((control >> 4) & 7); } /* @@ -143,49 +135,71 @@ static void msix_flush_writes(struct irq_desc *desc) * Returns 1 if it succeeded in masking the interrupt and 0 if the device * doesn't support MSI masking. */ -static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) +static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { - struct msi_desc *entry; + u32 mask_bits = desc->masked; - entry = get_irq_desc_msi(desc); - BUG_ON(!entry || !entry->dev); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - if (entry->msi_attrib.maskbit) { - int pos; - u32 mask_bits; - - pos = (long)entry->mask_base; - pci_read_config_dword(entry->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(entry->dev, pos, mask_bits); - } else { - return 0; - } - break; - case PCI_CAP_ID_MSIX: - { - int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; - writel(flag, entry->mask_base + offset); - readl(entry->mask_base + offset); - break; - } - default: - BUG(); - break; + if (!desc->msi_attrib.maskbit) + return; + + mask_bits &= ~mask; + mask_bits |= flag; + pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits); + desc->masked = mask_bits; +} + +/* + * This internal function does not flush PCI writes to the device. + * All users must ensure that they read from the device before either + * assuming that the device state is up to date, or returning out of this + * file. This saves a few milliseconds when initialising devices with lots + * of MSI-X interrupts. + */ +static void msix_mask_irq(struct msi_desc *desc, u32 flag) +{ + u32 mask_bits = desc->masked; + unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; + mask_bits &= ~1; + mask_bits |= flag; + writel(mask_bits, desc->mask_base + offset); + desc->masked = mask_bits; +} + +static void msi_set_mask_bit(unsigned irq, u32 flag) +{ + struct msi_desc *desc = get_irq_msi(irq); + + if (desc->msi_attrib.is_msix) { + msix_mask_irq(desc, flag); + readl(desc->mask_base); /* Flush write to device */ + } else { + unsigned offset = irq - desc->dev->irq; + msi_mask_irq(desc, 1 << offset, flag << offset); } - entry->msi_attrib.masked = !!flag; - return 1; +} + +void mask_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 1); +} + +void unmask_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 0); } void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - switch(entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - { + if (entry->msi_attrib.is_msix) { + void __iomem *base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); + } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; u16 data; @@ -201,21 +215,6 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) pci_read_config_word(dev, msi_data_reg(pos, 0), &data); } msg->data = data; - break; - } - case PCI_CAP_ID_MSIX: - { - void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - - msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET); - break; - } - default: - BUG(); } } @@ -229,11 +228,25 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg) void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - switch (entry->msi_attrib.type) { - case PCI_CAP_ID_MSI: - { + if (entry->msi_attrib.is_msix) { + void __iomem *base; + base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + writel(msg->address_lo, + base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); + writel(msg->address_hi, + base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); + writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); + } else { struct pci_dev *dev = entry->dev; int pos = entry->msi_attrib.pos; + u16 msgctl; + + pci_read_config_word(dev, msi_control_reg(pos), &msgctl); + msgctl &= ~PCI_MSI_FLAGS_QSIZE; + msgctl |= entry->msi_attrib.multiple << 4; + pci_write_config_word(dev, msi_control_reg(pos), msgctl); pci_write_config_dword(dev, msi_lower_address_reg(pos), msg->address_lo); @@ -246,23 +259,6 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) pci_write_config_word(dev, msi_data_reg(pos, 0), msg->data); } - break; - } - case PCI_CAP_ID_MSIX: - { - void __iomem *base; - base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - - writel(msg->address_lo, - base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); - writel(msg->address_hi, - base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); - writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); - break; - } - default: - BUG(); } entry->msg = *msg; } @@ -274,37 +270,18 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg) write_msi_msg_desc(desc, msg); } -void mask_msi_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 1); - msix_flush_writes(desc); -} - -void unmask_msi_irq(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - msi_set_mask_bits(desc, 1, 0); - msix_flush_writes(desc); -} - static int msi_free_irqs(struct pci_dev* dev); -static struct msi_desc* alloc_msi_entry(void) +static struct msi_desc *alloc_msi_entry(struct pci_dev *dev) { - struct msi_desc *entry; - - entry = kzalloc(sizeof(struct msi_desc), GFP_KERNEL); - if (!entry) + struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) return NULL; - INIT_LIST_HEAD(&entry->list); - entry->irq = 0; - entry->dev = NULL; + INIT_LIST_HEAD(&desc->list); + desc->dev = dev; - return entry; + return desc; } static void pci_intx_for_msi(struct pci_dev *dev, int enable) @@ -328,15 +305,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_intx_for_msi(dev, 0); msi_set_enable(dev, 0); write_msi_msg(dev->irq, &entry->msg); - if (entry->msi_attrib.maskbit) { - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask, - entry->msi_attrib.masked); - } pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); + msi_mask_irq(entry, msi_capable_mask(control), entry->masked); control &= ~PCI_MSI_FLAGS_QSIZE; - control |= PCI_MSI_FLAGS_ENABLE; + control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control); } @@ -354,9 +327,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev) msix_set_enable(dev, 0); list_for_each_entry(entry, &dev->msi_list, list) { - struct irq_desc *desc = irq_to_desc(entry->irq); write_msi_msg(entry->irq, &entry->msg); - msi_set_mask_bits(desc, 1, entry->msi_attrib.masked); + msix_mask_irq(entry, entry->masked); } BUG_ON(list_empty(&dev->msi_list)); @@ -378,52 +350,48 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state); /** * msi_capability_init - configure device's MSI capability structure * @dev: pointer to the pci_dev data structure of MSI device function + * @nvec: number of interrupts to allocate * - * Setup the MSI capability structure of device function with a single - * MSI irq, regardless of device function is capable of handling - * multiple messages. A return of zero indicates the successful setup - * of an entry zero with the new MSI irq or non-zero for otherwise. - **/ -static int msi_capability_init(struct pci_dev *dev) + * Setup the MSI capability structure of the device with the requested + * number of interrupts. A return value of zero indicates the successful + * setup of an entry with the new MSI irq. A negative return value indicates + * an error, and a positive return value indicates the number of interrupts + * which could have been allocated. + */ +static int msi_capability_init(struct pci_dev *dev, int nvec) { struct msi_desc *entry; int pos, ret; u16 control; + unsigned mask; msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ pos = pci_find_capability(dev, PCI_CAP_ID_MSI); pci_read_config_word(dev, msi_control_reg(pos), &control); /* MSI Entry Initialization */ - entry = alloc_msi_entry(); + entry = alloc_msi_entry(dev); if (!entry) return -ENOMEM; - entry->msi_attrib.type = PCI_CAP_ID_MSI; + entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = is_64bit_address(control); entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = is_mask_bit_support(control); - entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ entry->msi_attrib.pos = pos; - entry->dev = dev; - if (entry->msi_attrib.maskbit) { - unsigned int base, maskbits, temp; - - base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); - entry->mask_base = (void __iomem *)(long)base; - - /* All MSIs are unmasked by default, Mask them all */ - pci_read_config_dword(dev, base, &maskbits); - temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1); - maskbits |= temp; - pci_write_config_dword(dev, base, maskbits); - entry->msi_attrib.maskbits_mask = temp; - } + + entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64); + /* All MSIs are unmasked by default, Mask them all */ + if (entry->msi_attrib.maskbit) + pci_read_config_dword(dev, entry->mask_pos, &entry->masked); + mask = msi_capable_mask(control); + msi_mask_irq(entry, mask, mask); + list_add_tail(&entry->list, &dev->msi_list); /* Configure MSI capability structure */ - ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI); + ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); if (ret) { msi_free_irqs(dev); return ret; @@ -476,26 +444,28 @@ static int msix_capability_init(struct pci_dev *dev, /* MSI-X Table Initialization */ for (i = 0; i < nvec; i++) { - entry = alloc_msi_entry(); + entry = alloc_msi_entry(dev); if (!entry) break; j = entries[i].entry; - entry->msi_attrib.type = PCI_CAP_ID_MSIX; + entry->msi_attrib.is_msix = 1; entry->msi_attrib.is_64 = 1; entry->msi_attrib.entry_nr = j; - entry->msi_attrib.maskbit = 1; - entry->msi_attrib.masked = 1; entry->msi_attrib.default_irq = dev->irq; entry->msi_attrib.pos = pos; - entry->dev = dev; entry->mask_base = base; + entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); + msix_mask_irq(entry, 1); list_add_tail(&entry->list, &dev->msi_list); } ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); - if (ret) { + if (ret < 0) { + /* If we had some success report the number of irqs + * we succeeded in setting up. */ int avail = 0; list_for_each_entry(entry, &dev->msi_list, list) { if (entry->irq != 0) { @@ -503,14 +473,13 @@ static int msix_capability_init(struct pci_dev *dev, } } - msi_free_irqs(dev); + if (avail != 0) + ret = avail; + } - /* If we had some success report the number of irqs - * we succeeded in setting up. - */ - if (avail == 0) - avail = ret; - return avail; + if (ret) { + msi_free_irqs(dev); + return ret; } i = 0; @@ -575,39 +544,54 @@ static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type) } /** - * pci_enable_msi - configure device's MSI capability structure - * @dev: pointer to the pci_dev data structure of MSI device function + * pci_enable_msi_block - configure device's MSI capability structure + * @dev: device to configure + * @nvec: number of interrupts to configure * - * Setup the MSI capability structure of device function with - * a single MSI irq upon its software driver call to request for - * MSI mode enabled on its hardware device function. A return of zero - * indicates the successful setup of an entry zero with the new MSI - * irq or non-zero for otherwise. - **/ -int pci_enable_msi(struct pci_dev* dev) + * Allocate IRQs for a device with the MSI capability. + * This function returns a negative errno if an error occurs. If it + * is unable to allocate the number of interrupts requested, it returns + * the number of interrupts it might be able to allocate. If it successfully + * allocates at least the number of interrupts requested, it returns 0 and + * updates the @dev's irq member to the lowest new interrupt number; the + * other interrupt numbers allocated to this device are consecutive. + */ +int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) { - int status; + int status, pos, maxvec; + u16 msgctl; - status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI); + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (!pos) + return -EINVAL; + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); + if (nvec > maxvec) + return maxvec; + + status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI); if (status) return status; WARN_ON(!!dev->msi_enabled); - /* Check whether driver already requested for MSI-X irqs */ + /* Check whether driver already requested MSI-X irqs */ if (dev->msix_enabled) { dev_info(&dev->dev, "can't enable MSI " "(MSI-X already enabled)\n"); return -EINVAL; } - status = msi_capability_init(dev); + + status = msi_capability_init(dev, nvec); return status; } -EXPORT_SYMBOL(pci_enable_msi); +EXPORT_SYMBOL(pci_enable_msi_block); -void pci_msi_shutdown(struct pci_dev* dev) +void pci_msi_shutdown(struct pci_dev *dev) { - struct msi_desc *entry; + struct msi_desc *desc; + u32 mask; + u16 ctrl; if (!pci_msi_enable || !dev || !dev->msi_enabled) return; @@ -617,19 +601,15 @@ void pci_msi_shutdown(struct pci_dev* dev) dev->msi_enabled = 0; BUG_ON(list_empty(&dev->msi_list)); - entry = list_entry(dev->msi_list.next, struct msi_desc, list); - /* Return the the pci reset with msi irqs unmasked */ - if (entry->msi_attrib.maskbit) { - u32 mask = entry->msi_attrib.maskbits_mask; - struct irq_desc *desc = irq_to_desc(dev->irq); - msi_set_mask_bits(desc, mask, ~mask); - } - if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) - return; + desc = list_first_entry(&dev->msi_list, struct msi_desc, list); + pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS, &ctrl); + mask = msi_capable_mask(ctrl); + msi_mask_irq(desc, mask, ~mask); /* Restore dev->irq to its default pin-assertion irq */ - dev->irq = entry->msi_attrib.default_irq; + dev->irq = desc->msi_attrib.default_irq; } + void pci_disable_msi(struct pci_dev* dev) { struct msi_desc *entry; @@ -640,7 +620,7 @@ void pci_disable_msi(struct pci_dev* dev) pci_msi_shutdown(dev); entry = list_entry(dev->msi_list.next, struct msi_desc, list); - if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) + if (entry->msi_attrib.is_msix) return; msi_free_irqs(dev); @@ -652,14 +632,18 @@ static int msi_free_irqs(struct pci_dev* dev) struct msi_desc *entry, *tmp; list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq) - BUG_ON(irq_has_action(entry->irq)); + int i, nvec; + if (!entry->irq) + continue; + nvec = 1 << entry->msi_attrib.multiple; + for (i = 0; i < nvec; i++) + BUG_ON(irq_has_action(entry->irq + i)); } arch_teardown_msi_irqs(dev); list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) { - if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) { + if (entry->msi_attrib.is_msix) { writel(1, entry->mask_base + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); @@ -675,6 +659,23 @@ static int msi_free_irqs(struct pci_dev* dev) } /** + * pci_msix_table_size - return the number of device's MSI-X table entries + * @dev: pointer to the pci_dev data structure of MSI-X device function + */ +int pci_msix_table_size(struct pci_dev *dev) +{ + int pos; + u16 control; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + if (!pos) + return 0; + + pci_read_config_word(dev, msi_control_reg(pos), &control); + return multi_msix_capable(control); +} + +/** * pci_enable_msix - configure device's MSI-X capability structure * @dev: pointer to the pci_dev data structure of MSI-X device function * @entries: pointer to an array of MSI-X entries @@ -691,9 +692,8 @@ static int msi_free_irqs(struct pci_dev* dev) **/ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) { - int status, pos, nr_entries; + int status, nr_entries; int i, j; - u16 control; if (!entries) return -EINVAL; @@ -702,9 +702,7 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) if (status) return status; - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - pci_read_config_word(dev, msi_control_reg(pos), &control); - nr_entries = multi_msix_capable(control); + nr_entries = pci_msix_table_size(dev); if (nvec > nr_entries) return -EINVAL; diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index 3898f5237144..71f4df2ef654 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -20,14 +20,8 @@ #define msi_mask_bits_reg(base, is64bit) \ ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4) #define msi_disable(control) control &= ~PCI_MSI_FLAGS_ENABLE -#define multi_msi_capable(control) \ - (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) -#define multi_msi_enable(control, num) \ - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) -#define msi_enable(control, num) multi_msi_enable(control, num); \ - control |= PCI_MSI_FLAGS_ENABLE #define msix_table_offset_reg(base) (base + 0x04) #define msix_pba_offset_reg(base) (base + 0x08) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index deea8a187eb8..fac5eddcefd2 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -18,221 +18,6 @@ #include <linux/pci-acpi.h> #include "pci.h" -struct acpi_osc_data { - acpi_handle handle; - u32 support_set; - u32 control_set; - u32 control_query; - int is_queried; - struct list_head sibiling; -}; -static LIST_HEAD(acpi_osc_data_list); - -struct acpi_osc_args { - u32 capbuf[3]; -}; - -static DEFINE_MUTEX(pci_acpi_lock); - -static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle) -{ - struct acpi_osc_data *data; - - list_for_each_entry(data, &acpi_osc_data_list, sibiling) { - if (data->handle == handle) - return data; - } - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return NULL; - INIT_LIST_HEAD(&data->sibiling); - data->handle = handle; - list_add_tail(&data->sibiling, &acpi_osc_data_list); - return data; -} - -static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, - 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; - -static acpi_status acpi_run_osc(acpi_handle handle, - struct acpi_osc_args *osc_args, u32 *retval) -{ - acpi_status status; - struct acpi_object_list input; - union acpi_object in_params[4]; - struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; - union acpi_object *out_obj; - u32 errors, flags = osc_args->capbuf[OSC_QUERY_TYPE]; - - /* Setting up input parameters */ - input.count = 4; - input.pointer = in_params; - in_params[0].type = ACPI_TYPE_BUFFER; - in_params[0].buffer.length = 16; - in_params[0].buffer.pointer = OSC_UUID; - in_params[1].type = ACPI_TYPE_INTEGER; - in_params[1].integer.value = 1; - in_params[2].type = ACPI_TYPE_INTEGER; - in_params[2].integer.value = 3; - in_params[3].type = ACPI_TYPE_BUFFER; - in_params[3].buffer.length = 12; - in_params[3].buffer.pointer = (u8 *)osc_args->capbuf; - - status = acpi_evaluate_object(handle, "_OSC", &input, &output); - if (ACPI_FAILURE(status)) - return status; - - if (!output.length) - return AE_NULL_OBJECT; - - out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n"); - status = AE_TYPE; - goto out_kfree; - } - /* Need to ignore the bit0 in result code */ - errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) { - if (errors & OSC_REQUEST_ERROR) - printk(KERN_DEBUG "_OSC request fails\n"); - if (errors & OSC_INVALID_UUID_ERROR) - printk(KERN_DEBUG "_OSC invalid UUID\n"); - if (errors & OSC_INVALID_REVISION_ERROR) - printk(KERN_DEBUG "_OSC invalid revision\n"); - if (errors & OSC_CAPABILITIES_MASK_ERROR) { - if (flags & OSC_QUERY_ENABLE) - goto out_success; - printk(KERN_DEBUG "_OSC FW not grant req. control\n"); - status = AE_SUPPORT; - goto out_kfree; - } - status = AE_ERROR; - goto out_kfree; - } -out_success: - *retval = *((u32 *)(out_obj->buffer.pointer + 8)); - status = AE_OK; - -out_kfree: - kfree(output.pointer); - return status; -} - -static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data) -{ - acpi_status status; - u32 support_set, result; - struct acpi_osc_args osc_args; - - /* do _OSC query for all possible controls */ - support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS); - osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set; - osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; - - status = acpi_run_osc(osc_data->handle, &osc_args, &result); - if (ACPI_SUCCESS(status)) { - osc_data->support_set = support_set; - osc_data->control_query = result; - osc_data->is_queried = 1; - } - - return status; -} - -/* - * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature - * @flags: Bitmask of flags to support - * - * See the ACPI spec for the definition of the flags - */ -int pci_acpi_osc_support(acpi_handle handle, u32 flags) -{ - acpi_status status; - acpi_handle tmp; - struct acpi_osc_data *osc_data; - int rc = 0; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return -ENOTTY; - - mutex_lock(&pci_acpi_lock); - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - rc = -ENOMEM; - goto out; - } - - __acpi_query_osc(flags, osc_data); -out: - mutex_unlock(&pci_acpi_lock); - return rc; -} - -/** - * pci_osc_control_set - commit requested control to Firmware - * @handle: acpi_handle for the target ACPI object - * @flags: driver's requested control bits - * - * Attempt to take control from Firmware on requested control bits. - **/ -acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) -{ - acpi_status status; - u32 control_req, control_set, result; - acpi_handle tmp; - struct acpi_osc_data *osc_data; - struct acpi_osc_args osc_args; - - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return status; - - mutex_lock(&pci_acpi_lock); - osc_data = acpi_get_osc_data(handle); - if (!osc_data) { - printk(KERN_ERR "acpi osc data array is full\n"); - status = AE_ERROR; - goto out; - } - - control_req = (flags & OSC_CONTROL_MASKS); - if (!control_req) { - status = AE_TYPE; - goto out; - } - - /* No need to evaluate _OSC if the control was already granted. */ - if ((osc_data->control_set & control_req) == control_req) - goto out; - - if (!osc_data->is_queried) { - status = __acpi_query_osc(osc_data->support_set, osc_data); - if (ACPI_FAILURE(status)) - goto out; - } - - if ((osc_data->control_query & control_req) != control_req) { - status = AE_SUPPORT; - goto out; - } - - control_set = osc_data->control_set | control_req; - osc_args.capbuf[OSC_QUERY_TYPE] = 0; - osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set; - osc_args.capbuf[OSC_CONTROL_TYPE] = control_set; - status = acpi_run_osc(handle, &osc_args, &result); - if (ACPI_SUCCESS(status)) - osc_data->control_set = result; -out: - mutex_unlock(&pci_acpi_lock); - return status; -} -EXPORT_SYMBOL(pci_osc_control_set); - /* * _SxD returns the D-state with the highest power * (lowest D-state number) supported in the S-state "x". diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 267de88551c9..c0cbbb5a245e 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -99,6 +99,52 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count) } static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id); +/** + * store_remove_id - remove a PCI device ID from this driver + * @driver: target device driver + * @buf: buffer for scanning device ID data + * @count: input size + * + * Removes a dynamic pci device ID to this driver. + */ +static ssize_t +store_remove_id(struct device_driver *driver, const char *buf, size_t count) +{ + struct pci_dynid *dynid, *n; + struct pci_driver *pdrv = to_pci_driver(driver); + __u32 vendor, device, subvendor = PCI_ANY_ID, + subdevice = PCI_ANY_ID, class = 0, class_mask = 0; + int fields = 0; + int retval = -ENODEV; + + fields = sscanf(buf, "%x %x %x %x %x %x", + &vendor, &device, &subvendor, &subdevice, + &class, &class_mask); + if (fields < 2) + return -EINVAL; + + spin_lock(&pdrv->dynids.lock); + list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) { + struct pci_device_id *id = &dynid->id; + if ((id->vendor == vendor) && + (id->device == device) && + (subvendor == PCI_ANY_ID || id->subvendor == subvendor) && + (subdevice == PCI_ANY_ID || id->subdevice == subdevice) && + !((id->class ^ class) & class_mask)) { + list_del(&dynid->node); + kfree(dynid); + retval = 0; + break; + } + } + spin_unlock(&pdrv->dynids.lock); + + if (retval) + return retval; + return count; +} +static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id); + static void pci_free_dynids(struct pci_driver *drv) { @@ -125,6 +171,20 @@ static void pci_remove_newid_file(struct pci_driver *drv) { driver_remove_file(&drv->driver, &driver_attr_new_id); } + +static int +pci_create_removeid_file(struct pci_driver *drv) +{ + int error = 0; + if (drv->probe != NULL) + error = driver_create_file(&drv->driver,&driver_attr_remove_id); + return error; +} + +static void pci_remove_removeid_file(struct pci_driver *drv) +{ + driver_remove_file(&drv->driver, &driver_attr_remove_id); +} #else /* !CONFIG_HOTPLUG */ static inline void pci_free_dynids(struct pci_driver *drv) {} static inline int pci_create_newid_file(struct pci_driver *drv) @@ -132,6 +192,11 @@ static inline int pci_create_newid_file(struct pci_driver *drv) return 0; } static inline void pci_remove_newid_file(struct pci_driver *drv) {} +static inline int pci_create_removeid_file(struct pci_driver *drv) +{ + return 0; +} +static inline void pci_remove_removeid_file(struct pci_driver *drv) {} #endif /** @@ -899,13 +964,23 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, /* register with core */ error = driver_register(&drv->driver); if (error) - return error; + goto out; error = pci_create_newid_file(drv); if (error) - driver_unregister(&drv->driver); + goto out_newid; + error = pci_create_removeid_file(drv); + if (error) + goto out_removeid; +out: return error; + +out_removeid: + pci_remove_newid_file(drv); +out_newid: + driver_unregister(&drv->driver); + goto out; } /** @@ -921,6 +996,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, void pci_unregister_driver(struct pci_driver *drv) { + pci_remove_removeid_file(drv); pci_remove_newid_file(drv); driver_unregister(&drv->driver); pci_free_dynids(drv); @@ -1020,6 +1096,7 @@ struct bus_type pci_bus_type = { .remove = pci_device_remove, .shutdown = pci_device_shutdown, .dev_attrs = pci_dev_attrs, + .bus_attrs = pci_bus_attrs, .pm = PCI_PM_OPS_PTR, }; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index dfc4e0ddf241..e9a8706a6401 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -219,6 +219,83 @@ msi_bus_store(struct device *dev, struct device_attribute *attr, return count; } +#ifdef CONFIG_HOTPLUG +static DEFINE_MUTEX(pci_remove_rescan_mutex); +static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf, + size_t count) +{ + unsigned long val; + struct pci_bus *b = NULL; + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) { + mutex_lock(&pci_remove_rescan_mutex); + while ((b = pci_find_next_bus(b)) != NULL) + pci_rescan_bus(b); + mutex_unlock(&pci_remove_rescan_mutex); + } + return count; +} + +struct bus_attribute pci_bus_attrs[] = { + __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, bus_rescan_store), + __ATTR_NULL +}; + +static ssize_t +dev_rescan_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + struct pci_dev *pdev = to_pci_dev(dev); + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (val) { + mutex_lock(&pci_remove_rescan_mutex); + pci_rescan_bus(pdev->bus); + mutex_unlock(&pci_remove_rescan_mutex); + } + return count; +} + +static void remove_callback(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + mutex_lock(&pci_remove_rescan_mutex); + pci_remove_bus_device(pdev); + mutex_unlock(&pci_remove_rescan_mutex); +} + +static ssize_t +remove_store(struct device *dev, struct device_attribute *dummy, + const char *buf, size_t count) +{ + int ret = 0; + unsigned long val; + struct pci_dev *pdev = to_pci_dev(dev); + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + if (pci_is_root_bus(pdev->bus)) + return -EBUSY; + + /* An attribute cannot be unregistered by one of its own methods, + * so we have to use this roundabout approach. + */ + if (val) + ret = device_schedule_callback(dev, remove_callback); + if (ret) + count = ret; + return count; +} +#endif + struct device_attribute pci_dev_attrs[] = { __ATTR_RO(resource), __ATTR_RO(vendor), @@ -237,10 +314,25 @@ struct device_attribute pci_dev_attrs[] = { __ATTR(broken_parity_status,(S_IRUGO|S_IWUSR), broken_parity_status_show,broken_parity_status_store), __ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store), +#ifdef CONFIG_HOTPLUG + __ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store), + __ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store), +#endif __ATTR_NULL, }; static ssize_t +boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return sprintf(buf, "%u\n", + !!(pdev->resource[PCI_ROM_RESOURCE].flags & + IORESOURCE_ROM_SHADOW)); +} +struct device_attribute vga_attr = __ATTR_RO(boot_vga); + +static ssize_t pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { @@ -493,6 +585,19 @@ pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr, } /** + * pci_adjust_legacy_attr - adjustment of legacy file attributes + * @b: bus to create files under + * @mmap_type: I/O port or memory + * + * Stub implementation. Can be overridden by arch if necessary. + */ +void __weak +pci_adjust_legacy_attr(struct pci_bus *b, enum pci_mmap_state mmap_type) +{ + return; +} + +/** * pci_create_legacy_files - create legacy I/O port and memory files * @b: bus to create files under * @@ -518,6 +623,7 @@ void pci_create_legacy_files(struct pci_bus *b) b->legacy_io->read = pci_read_legacy_io; b->legacy_io->write = pci_write_legacy_io; b->legacy_io->mmap = pci_mmap_legacy_io; + pci_adjust_legacy_attr(b, pci_mmap_io); error = device_create_bin_file(&b->dev, b->legacy_io); if (error) goto legacy_io_err; @@ -528,6 +634,7 @@ void pci_create_legacy_files(struct pci_bus *b) b->legacy_mem->size = 1024*1024; b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR; b->legacy_mem->mmap = pci_mmap_legacy_mem; + pci_adjust_legacy_attr(b, pci_mmap_mem); error = device_create_bin_file(&b->dev, b->legacy_mem); if (error) goto legacy_mem_err; @@ -719,8 +826,8 @@ static int pci_create_resource_files(struct pci_dev *pdev) return 0; } #else /* !HAVE_PCI_MMAP */ -static inline int pci_create_resource_files(struct pci_dev *dev) { return 0; } -static inline void pci_remove_resource_files(struct pci_dev *dev) { return; } +int __weak pci_create_resource_files(struct pci_dev *dev) { return 0; } +void __weak pci_remove_resource_files(struct pci_dev *dev) { return; } #endif /* HAVE_PCI_MMAP */ /** @@ -884,18 +991,27 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) pdev->rom_attr = attr; } + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) { + retval = device_create_file(&pdev->dev, &vga_attr); + if (retval) + goto err_rom_file; + } + /* add platform-specific attributes */ retval = pcibios_add_platform_entries(pdev); if (retval) - goto err_rom_file; + goto err_vga_file; /* add sysfs entries for various capabilities */ retval = pci_create_capabilities_sysfs(pdev); if (retval) - goto err_rom_file; + goto err_vga_file; return 0; +err_vga_file: + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + device_remove_file(&pdev->dev, &vga_attr); err_rom_file: if (rom_size) { sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0195066251e5..fe7ac2cea7c9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -20,6 +20,8 @@ #include <linux/pm_wakeup.h> #include <linux/interrupt.h> #include <asm/dma.h> /* isa_dma_bridge_buggy */ +#include <linux/device.h> +#include <asm/setup.h> #include "pci.h" unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT; @@ -677,6 +679,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) EXPORT_SYMBOL(pci_choose_state); +#define PCI_EXP_SAVE_REGS 7 + static int pci_save_pcie_state(struct pci_dev *dev) { int pos, i = 0; @@ -689,7 +693,7 @@ static int pci_save_pcie_state(struct pci_dev *dev) save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); if (!save_state) { - dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__); + dev_err(&dev->dev, "buffer not found in %s\n", __func__); return -ENOMEM; } cap = (u16 *)&save_state->data[0]; @@ -698,6 +702,9 @@ static int pci_save_pcie_state(struct pci_dev *dev) pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); return 0; } @@ -718,6 +725,9 @@ static void pci_restore_pcie_state(struct pci_dev *dev) pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); } @@ -732,7 +742,7 @@ static int pci_save_pcix_state(struct pci_dev *dev) save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX); if (!save_state) { - dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__); + dev_err(&dev->dev, "buffer not found in %s\n", __func__); return -ENOMEM; } @@ -805,6 +815,7 @@ pci_restore_state(struct pci_dev *dev) } pci_restore_pcix_state(dev); pci_restore_msi_state(dev); + pci_restore_iov_state(dev); return 0; } @@ -1401,7 +1412,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) { int error; - error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, 4 * sizeof(u16)); + error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, + PCI_EXP_SAVE_REGS * sizeof(u16)); if (error) dev_err(&dev->dev, "unable to preallocate PCI Express save buffer\n"); @@ -1472,7 +1484,7 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge) if (!pin) return -1; - while (dev->bus->self) { + while (dev->bus->parent) { pin = pci_swizzle_interrupt_pin(dev, pin); dev = dev->bus->self; } @@ -1492,7 +1504,7 @@ u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp) { u8 pin = *pinp; - while (dev->bus->self) { + while (dev->bus->parent) { pin = pci_swizzle_interrupt_pin(dev, pin); dev = dev->bus->self; } @@ -2016,18 +2028,24 @@ static int __pcie_flr(struct pci_dev *dev, int probe) pci_block_user_cfg_access(dev); /* Wait for Transaction Pending bit clean */ + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (!(status & PCI_EXP_DEVSTA_TRPND)) + goto transaction_done; + msleep(100); pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); - if (status & PCI_EXP_DEVSTA_TRPND) { - dev_info(&dev->dev, "Busy after 100ms while trying to reset; " + if (!(status & PCI_EXP_DEVSTA_TRPND)) + goto transaction_done; + + dev_info(&dev->dev, "Busy after 100ms while trying to reset; " "sleeping for 1 second\n"); - ssleep(1); - pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); - if (status & PCI_EXP_DEVSTA_TRPND) - dev_info(&dev->dev, "Still busy after 1s; " + ssleep(1); + pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status); + if (status & PCI_EXP_DEVSTA_TRPND) + dev_info(&dev->dev, "Still busy after 1s; " "proceeding with reset anyway\n"); - } +transaction_done: pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); mdelay(100); @@ -2054,18 +2072,24 @@ static int __pci_af_flr(struct pci_dev *dev, int probe) pci_block_user_cfg_access(dev); /* Wait for Transaction Pending bit clean */ + pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); + if (!(status & PCI_AF_STATUS_TP)) + goto transaction_done; + msleep(100); pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); - if (status & PCI_AF_STATUS_TP) { - dev_info(&dev->dev, "Busy after 100ms while trying to" - " reset; sleeping for 1 second\n"); - ssleep(1); - pci_read_config_byte(dev, - cappos + PCI_AF_STATUS, &status); - if (status & PCI_AF_STATUS_TP) - dev_info(&dev->dev, "Still busy after 1s; " - "proceeding with reset anyway\n"); - } + if (!(status & PCI_AF_STATUS_TP)) + goto transaction_done; + + dev_info(&dev->dev, "Busy after 100ms while trying to" + " reset; sleeping for 1 second\n"); + ssleep(1); + pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); + if (status & PCI_AF_STATUS_TP) + dev_info(&dev->dev, "Still busy after 1s; " + "proceeding with reset anyway\n"); + +transaction_done: pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR); mdelay(100); @@ -2334,18 +2358,140 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags) */ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) { + int reg; + if (resno < PCI_ROM_RESOURCE) { *type = pci_bar_unknown; return PCI_BASE_ADDRESS_0 + 4 * resno; } else if (resno == PCI_ROM_RESOURCE) { *type = pci_bar_mem32; return dev->rom_base_reg; + } else if (resno < PCI_BRIDGE_RESOURCES) { + /* device specific resource */ + reg = pci_iov_resource_bar(dev, resno, type); + if (reg) + return reg; } dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno); return 0; } +#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE +static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0}; +spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED; + +/** + * pci_specified_resource_alignment - get resource alignment specified by user. + * @dev: the PCI device to get + * + * RETURNS: Resource alignment if it is specified. + * Zero if it is not specified. + */ +resource_size_t pci_specified_resource_alignment(struct pci_dev *dev) +{ + int seg, bus, slot, func, align_order, count; + resource_size_t align = 0; + char *p; + + spin_lock(&resource_alignment_lock); + p = resource_alignment_param; + while (*p) { + count = 0; + if (sscanf(p, "%d%n", &align_order, &count) == 1 && + p[count] == '@') { + p += count + 1; + } else { + align_order = -1; + } + if (sscanf(p, "%x:%x:%x.%x%n", + &seg, &bus, &slot, &func, &count) != 4) { + seg = 0; + if (sscanf(p, "%x:%x.%x%n", + &bus, &slot, &func, &count) != 3) { + /* Invalid format */ + printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n", + p); + break; + } + } + p += count; + if (seg == pci_domain_nr(dev->bus) && + bus == dev->bus->number && + slot == PCI_SLOT(dev->devfn) && + func == PCI_FUNC(dev->devfn)) { + if (align_order == -1) { + align = PAGE_SIZE; + } else { + align = 1 << align_order; + } + /* Found */ + break; + } + if (*p != ';' && *p != ',') { + /* End of param or invalid format */ + break; + } + p++; + } + spin_unlock(&resource_alignment_lock); + return align; +} + +/** + * pci_is_reassigndev - check if specified PCI is target device to reassign + * @dev: the PCI device to check + * + * RETURNS: non-zero for PCI device is a target device to reassign, + * or zero is not. + */ +int pci_is_reassigndev(struct pci_dev *dev) +{ + return (pci_specified_resource_alignment(dev) != 0); +} + +ssize_t pci_set_resource_alignment_param(const char *buf, size_t count) +{ + if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1) + count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1; + spin_lock(&resource_alignment_lock); + strncpy(resource_alignment_param, buf, count); + resource_alignment_param[count] = '\0'; + spin_unlock(&resource_alignment_lock); + return count; +} + +ssize_t pci_get_resource_alignment_param(char *buf, size_t size) +{ + size_t count; + spin_lock(&resource_alignment_lock); + count = snprintf(buf, size, "%s", resource_alignment_param); + spin_unlock(&resource_alignment_lock); + return count; +} + +static ssize_t pci_resource_alignment_show(struct bus_type *bus, char *buf) +{ + return pci_get_resource_alignment_param(buf, PAGE_SIZE); +} + +static ssize_t pci_resource_alignment_store(struct bus_type *bus, + const char *buf, size_t count) +{ + return pci_set_resource_alignment_param(buf, count); +} + +BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show, + pci_resource_alignment_store); + +static int __init pci_resource_alignment_sysfs_init(void) +{ + return bus_create_file(&pci_bus_type, + &bus_attr_resource_alignment); +} + +late_initcall(pci_resource_alignment_sysfs_init); + static void __devinit pci_no_domains(void) { #ifdef CONFIG_PCI_DOMAINS @@ -2394,6 +2540,9 @@ static int __init pci_setup(char *str) pci_cardbus_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "cbmemsize=", 10)) { pci_cardbus_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "resource_alignment=", 19)) { + pci_set_resource_alignment_param(str + 19, + strlen(str + 19)); } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 149fff65891f..d03f6b99f292 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1,6 +1,8 @@ #ifndef DRIVERS_PCI_H #define DRIVERS_PCI_H +#include <linux/workqueue.h> + #define PCI_CFG_SPACE_SIZE 256 #define PCI_CFG_SPACE_EXP_SIZE 4096 @@ -135,6 +137,12 @@ extern int pcie_mch_quirk; extern struct device_attribute pci_dev_attrs[]; extern struct device_attribute dev_attr_cpuaffinity; extern struct device_attribute dev_attr_cpulistaffinity; +#ifdef CONFIG_HOTPLUG +extern struct bus_attribute pci_bus_attrs[]; +#else +#define pci_bus_attrs NULL +#endif + /** * pci_match_one_device - Tell if a PCI device structure has a matching @@ -177,6 +185,7 @@ enum pci_bar_type { pci_bar_mem64, /* A 64-bit memory BAR */ }; +extern int pci_setup_device(struct pci_dev *dev); extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); extern int pci_resource_bar(struct pci_dev *dev, int resno, @@ -194,4 +203,60 @@ static inline int pci_ari_enabled(struct pci_bus *bus) return bus->self && bus->self->ari_enabled; } +#ifdef CONFIG_PCI_QUIRKS +extern int pci_is_reassigndev(struct pci_dev *dev); +resource_size_t pci_specified_resource_alignment(struct pci_dev *dev); +extern void pci_disable_bridge_window(struct pci_dev *dev); +#endif + +/* Single Root I/O Virtualization */ +struct pci_sriov { + int pos; /* capability position */ + int nres; /* number of resources */ + u32 cap; /* SR-IOV Capabilities */ + u16 ctrl; /* SR-IOV Control */ + u16 total; /* total VFs associated with the PF */ + u16 initial; /* initial VFs associated with the PF */ + u16 nr_virtfn; /* number of VFs available */ + u16 offset; /* first VF Routing ID offset */ + u16 stride; /* following VF stride */ + u32 pgsz; /* page size for BAR alignment */ + u8 link; /* Function Dependency Link */ + struct pci_dev *dev; /* lowest numbered PF */ + struct pci_dev *self; /* this PF */ + struct mutex lock; /* lock for VF bus */ + struct work_struct mtask; /* VF Migration task */ + u8 __iomem *mstate; /* VF Migration State Array */ +}; + +#ifdef CONFIG_PCI_IOV +extern int pci_iov_init(struct pci_dev *dev); +extern void pci_iov_release(struct pci_dev *dev); +extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type); +extern void pci_restore_iov_state(struct pci_dev *dev); +extern int pci_iov_bus_range(struct pci_bus *bus); +#else +static inline int pci_iov_init(struct pci_dev *dev) +{ + return -ENODEV; +} +static inline void pci_iov_release(struct pci_dev *dev) + +{ +} +static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + return 0; +} +static inline void pci_restore_iov_state(struct pci_dev *dev) +{ +} +static inline int pci_iov_bus_range(struct pci_bus *bus) +{ + return 0; +} +#endif /* CONFIG_PCI_IOV */ + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index e390707661dd..32ade5af927e 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -38,30 +38,13 @@ MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -static int __devinit aer_probe (struct pcie_device *dev, - const struct pcie_port_service_id *id ); +static int __devinit aer_probe (struct pcie_device *dev); static void aer_remove(struct pcie_device *dev); -static int aer_suspend(struct pcie_device *dev, pm_message_t state) -{return 0;} -static int aer_resume(struct pcie_device *dev) {return 0;} static pci_ers_result_t aer_error_detected(struct pci_dev *dev, enum pci_channel_state error); static void aer_error_resume(struct pci_dev *dev); static pci_ers_result_t aer_root_reset(struct pci_dev *dev); -/* - * PCI Express bus's AER Root service driver data structure - */ -static struct pcie_port_service_id aer_id[] = { - { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_RC_PORT, - .service_type = PCIE_PORT_SERVICE_AER, - }, - { /* end: all zeroes */ } -}; - static struct pci_error_handlers aer_error_handlers = { .error_detected = aer_error_detected, .resume = aer_error_resume, @@ -69,14 +52,12 @@ static struct pci_error_handlers aer_error_handlers = { static struct pcie_port_service_driver aerdriver = { .name = "aer", - .id_table = &aer_id[0], + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, .remove = aer_remove, - .suspend = aer_suspend, - .resume = aer_resume, - .err_handler = &aer_error_handlers, .reset_link = aer_root_reset, @@ -207,8 +188,7 @@ static void aer_remove(struct pcie_device *dev) * * Invoked when PCI Express bus loads AER service driver. **/ -static int __devinit aer_probe (struct pcie_device *dev, - const struct pcie_port_service_id *id ) +static int __devinit aer_probe (struct pcie_device *dev) { int status; struct aer_rpc *rpc; diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index ebce26c37049..8edb2f300e8f 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -38,7 +38,7 @@ int aer_osc_setup(struct pcie_device *pciedev) handle = acpi_find_root_bridge_handle(pdev); if (handle) { - status = pci_osc_control_set(handle, + status = acpi_pci_osc_control_set(handle, OSC_PCI_EXPRESS_AER_CONTROL | OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 382575007382..307452f30035 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -351,21 +351,21 @@ static int find_aer_service_iter(struct device *device, void *data) { struct device_driver *driver; struct pcie_port_service_driver *service_driver; - struct pcie_device *pcie_dev; struct find_aer_service_data *result; result = (struct find_aer_service_data *) data; if (device->bus == &pcie_port_bus_type) { - pcie_dev = to_pcie_device(device); - if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT) + struct pcie_port_data *port_data; + + port_data = pci_get_drvdata(to_pcie_device(device)->port); + if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT) result->is_downstream = 1; driver = device->driver; if (driver) { service_driver = to_service_driver(driver); - if (service_driver->id_table->service_type == - PCIE_PORT_SERVICE_AER) { + if (service_driver->service == PCIE_PORT_SERVICE_AER) { result->aer_driver = service_driver; return 1; } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 2529f3f2ea5a..17ad53868f9f 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -25,19 +25,21 @@ #define PCIE_CAPABILITIES_REG 0x2 #define PCIE_SLOT_CAPABILITIES_REG 0x14 #define PCIE_PORT_DEVICE_MAXSERVICES 4 +#define PCIE_PORT_MSI_VECTOR_MASK 0x1f +/* + * According to the PCI Express Base Specification 2.0, the indices of the MSI-X + * table entires used by port services must not exceed 31 + */ +#define PCIE_PORT_MAX_MSIX_ENTRIES 32 #define get_descriptor_id(type, service) (((type - 4) << 4) | service) -struct pcie_port_device_ext { - int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ -}; - extern struct bus_type pcie_port_bus_type; extern int pcie_port_device_probe(struct pci_dev *dev); extern int pcie_port_device_register(struct pci_dev *dev); #ifdef CONFIG_PM -extern int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state); -extern int pcie_port_device_resume(struct pci_dev *dev); +extern int pcie_port_device_suspend(struct device *dev); +extern int pcie_port_device_resume(struct device *dev); #endif extern void pcie_port_device_remove(struct pci_dev *dev); extern int __must_check pcie_port_bus_register(void); diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index eec89b767f9f..ef3a4eeaebb4 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -26,20 +26,22 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type); static int pcie_port_bus_match(struct device *dev, struct device_driver *drv) { struct pcie_device *pciedev; + struct pcie_port_data *port_data; struct pcie_port_service_driver *driver; if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type) return 0; - + pciedev = to_pcie_device(dev); driver = to_service_driver(drv); - if ( (driver->id_table->vendor != PCI_ANY_ID && - driver->id_table->vendor != pciedev->id.vendor) || - (driver->id_table->device != PCI_ANY_ID && - driver->id_table->device != pciedev->id.device) || - (driver->id_table->port_type != PCIE_ANY_PORT && - driver->id_table->port_type != pciedev->id.port_type) || - driver->id_table->service_type != pciedev->id.service_type ) + + if (driver->service != pciedev->service) + return 0; + + port_data = pci_get_drvdata(pciedev->port); + + if (driver->port_type != PCIE_ANY_PORT + && driver->port_type != port_data->port_type) return 0; return 1; diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 8b3f8c18032f..e39982503863 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -15,10 +15,9 @@ #include <linux/slab.h> #include <linux/pcieport_if.h> +#include "../pci.h" #include "portdrv.h" -extern int pcie_mch_quirk; /* MSI-quirk Indicator */ - /** * release_pcie_device - free PCI Express port service device structure * @dev: Port service device to release @@ -31,26 +30,150 @@ static void release_pcie_device(struct device *dev) kfree(to_pcie_device(dev)); } -static int is_msi_quirked(struct pci_dev *dev) +/** + * pcie_port_msix_add_entry - add entry to given array of MSI-X entries + * @entries: Array of MSI-X entries + * @new_entry: Index of the entry to add to the array + * @nr_entries: Number of entries aleady in the array + * + * Return value: Position of the added entry in the array + */ +static int pcie_port_msix_add_entry( + struct msix_entry *entries, int new_entry, int nr_entries) { - int port_type, quirk = 0; + int j; + + for (j = 0; j < nr_entries; j++) + if (entries[j].entry == new_entry) + return j; + + entries[j].entry = new_entry; + return j; +} + +/** + * pcie_port_enable_msix - try to set up MSI-X as interrupt mode for given port + * @dev: PCI Express port to handle + * @vectors: Array of interrupt vectors to populate + * @mask: Bitmask of port capabilities returned by get_port_device_capability() + * + * Return value: 0 on success, error code on failure + */ +static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask) +{ + struct msix_entry *msix_entries; + int idx[PCIE_PORT_DEVICE_MAXSERVICES]; + int nr_entries, status, pos, i, nvec; u16 reg16; + u32 reg32; - pci_read_config_word(dev, - pci_find_capability(dev, PCI_CAP_ID_EXP) + - PCIE_CAPABILITIES_REG, ®16); - port_type = (reg16 >> 4) & PORT_TYPE_MASK; - switch(port_type) { - case PCIE_RC_PORT: - if (pcie_mch_quirk == 1) - quirk = 1; - break; - case PCIE_SW_UPSTREAM_PORT: - case PCIE_SW_DOWNSTREAM_PORT: - default: - break; + nr_entries = pci_msix_table_size(dev); + if (!nr_entries) + return -EINVAL; + if (nr_entries > PCIE_PORT_MAX_MSIX_ENTRIES) + nr_entries = PCIE_PORT_MAX_MSIX_ENTRIES; + + msix_entries = kzalloc(sizeof(*msix_entries) * nr_entries, GFP_KERNEL); + if (!msix_entries) + return -ENOMEM; + + /* + * Allocate as many entries as the port wants, so that we can check + * which of them will be useful. Moreover, if nr_entries is correctly + * equal to the number of entries this port actually uses, we'll happily + * go through without any tricks. + */ + for (i = 0; i < nr_entries; i++) + msix_entries[i].entry = i; + + status = pci_enable_msix(dev, msix_entries, nr_entries); + if (status) + goto Exit; + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + idx[i] = -1; + status = -EIO; + nvec = 0; + + if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) { + int entry; + + /* + * The code below follows the PCI Express Base Specification 2.0 + * stating in Section 6.1.6 that "PME and Hot-Plug Event + * interrupts (when both are implemented) always share the same + * MSI or MSI-X vector, as indicated by the Interrupt Message + * Number field in the PCI Express Capabilities register", where + * according to Section 7.8.2 of the specification "For MSI-X, + * the value in this field indicates which MSI-X Table entry is + * used to generate the interrupt message." + */ + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, ®16); + entry = (reg16 >> 9) & PCIE_PORT_MSI_VECTOR_MASK; + if (entry >= nr_entries) + goto Error; + + i = pcie_port_msix_add_entry(msix_entries, entry, nvec); + if (i == nvec) + nvec++; + + idx[PCIE_PORT_SERVICE_PME_SHIFT] = i; + idx[PCIE_PORT_SERVICE_HP_SHIFT] = i; + } + + if (mask & PCIE_PORT_SERVICE_AER) { + int entry; + + /* + * The code below follows Section 7.10.10 of the PCI Express + * Base Specification 2.0 stating that bits 31-27 of the Root + * Error Status Register contain a value indicating which of the + * MSI/MSI-X vectors assigned to the port is going to be used + * for AER, where "For MSI-X, the value in this register + * indicates which MSI-X Table entry is used to generate the + * interrupt message." + */ + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); + entry = reg32 >> 27; + if (entry >= nr_entries) + goto Error; + + i = pcie_port_msix_add_entry(msix_entries, entry, nvec); + if (i == nvec) + nvec++; + + idx[PCIE_PORT_SERVICE_AER_SHIFT] = i; } - return quirk; + + /* + * If nvec is equal to the allocated number of entries, we can just use + * what we have. Otherwise, the port has some extra entries not for the + * services we know and we need to work around that. + */ + if (nvec == nr_entries) { + status = 0; + } else { + /* Drop the temporary MSI-X setup */ + pci_disable_msix(dev); + + /* Now allocate the MSI-X vectors for real */ + status = pci_enable_msix(dev, msix_entries, nvec); + if (status) + goto Exit; + } + + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + vectors[i] = idx[i] >= 0 ? msix_entries[idx[i]].vector : -1; + + Exit: + kfree(msix_entries); + return status; + + Error: + pci_disable_msix(dev); + goto Exit; } /** @@ -64,47 +187,32 @@ static int is_msi_quirked(struct pci_dev *dev) */ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) { - int i, pos, nvec, status = -EINVAL; - int interrupt_mode = PCIE_PORT_INTx_MODE; + struct pcie_port_data *port_data = pci_get_drvdata(dev); + int irq, interrupt_mode = PCIE_PORT_NO_IRQ; + int i; - /* Set INTx as default */ - for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { - if (mask & (1 << i)) - nvec++; - vectors[i] = dev->irq; - } - /* Check MSI quirk */ - if (is_msi_quirked(dev)) - return interrupt_mode; - - /* Select MSI-X over MSI if supported */ - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - if (pos) { - struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = - {{0, 0}, {0, 1}, {0, 2}, {0, 3}}; - status = pci_enable_msix(dev, msix_entries, nvec); - if (!status) { - int j = 0; - - interrupt_mode = PCIE_PORT_MSIX_MODE; - for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { - if (mask & (1 << i)) - vectors[i] = msix_entries[j++].vector; - } - } - } - if (status) { - pos = pci_find_capability(dev, PCI_CAP_ID_MSI); - if (pos) { - status = pci_enable_msi(dev); - if (!status) { - interrupt_mode = PCIE_PORT_MSI_MODE; - for (i = 0;i < PCIE_PORT_DEVICE_MAXSERVICES;i++) - vectors[i] = dev->irq; - } - } - } + if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk) + goto Fallback; + + /* Try to use MSI-X if supported */ + if (!pcie_port_enable_msix(dev, vectors, mask)) + return PCIE_PORT_MSIX_MODE; + + /* We're not going to use MSI-X, so try MSI and fall back to INTx */ + if (!pci_enable_msi(dev)) + interrupt_mode = PCIE_PORT_MSI_MODE; + + Fallback: + if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin) + interrupt_mode = PCIE_PORT_INTx_MODE; + + irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1; + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + vectors[i] = irq; + + vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1; + return interrupt_mode; } @@ -132,13 +240,11 @@ static int get_port_device_capability(struct pci_dev *dev) pos + PCIE_SLOT_CAPABILITIES_REG, ®32); if (reg32 & SLOT_HP_CAPABLE_MASK) services |= PCIE_PORT_SERVICE_HP; - } - /* PME Capable - root port capability */ - if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT) - services |= PCIE_PORT_SERVICE_PME; - + } + /* AER capable */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)) services |= PCIE_PORT_SERVICE_AER; + /* VC support */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC)) services |= PCIE_PORT_SERVICE_VC; @@ -152,20 +258,17 @@ static int get_port_device_capability(struct pci_dev *dev) * @port_type: Type of the port * @service_type: Type of service to associate with the service device * @irq: Interrupt vector to associate with the service device - * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) */ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, - int port_type, int service_type, int irq, int irq_mode) + int service_type, int irq) { + struct pcie_port_data *port_data = pci_get_drvdata(parent); struct device *device; + int port_type = port_data->port_type; dev->port = parent; - dev->interrupt_mode = irq_mode; dev->irq = irq; - dev->id.vendor = parent->vendor; - dev->id.device = parent->device; - dev->id.port_type = port_type; - dev->id.service_type = (1 << service_type); + dev->service = service_type; /* Initialize generic device interface */ device = &dev->device; @@ -185,10 +288,9 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, * @port_type: Type of the port * @service_type: Type of service to associate with the service device * @irq: Interrupt vector to associate with the service device - * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) */ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, - int port_type, int service_type, int irq, int irq_mode) + int service_type, int irq) { struct pcie_device *device; @@ -196,7 +298,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, if (!device) return NULL; - pcie_device_init(parent, device, port_type, service_type, irq,irq_mode); + pcie_device_init(parent, device, service_type, irq); return device; } @@ -230,63 +332,90 @@ int pcie_port_device_probe(struct pci_dev *dev) */ int pcie_port_device_register(struct pci_dev *dev) { - struct pcie_port_device_ext *p_ext; - int status, type, capabilities, irq_mode, i; + struct pcie_port_data *port_data; + int status, capabilities, irq_mode, i, nr_serv; int vectors[PCIE_PORT_DEVICE_MAXSERVICES]; u16 reg16; - /* Allocate port device extension */ - if (!(p_ext = kmalloc(sizeof(struct pcie_port_device_ext), GFP_KERNEL))) + port_data = kzalloc(sizeof(*port_data), GFP_KERNEL); + if (!port_data) return -ENOMEM; - - pci_set_drvdata(dev, p_ext); + pci_set_drvdata(dev, port_data); /* Get port type */ pci_read_config_word(dev, pci_find_capability(dev, PCI_CAP_ID_EXP) + PCIE_CAPABILITIES_REG, ®16); - type = (reg16 >> 4) & PORT_TYPE_MASK; + port_data->port_type = (reg16 >> 4) & PORT_TYPE_MASK; - /* Now get port services */ capabilities = get_port_device_capability(dev); + /* Root ports are capable of generating PME too */ + if (port_data->port_type == PCIE_RC_PORT) + capabilities |= PCIE_PORT_SERVICE_PME; + irq_mode = assign_interrupt_mode(dev, vectors, capabilities); - p_ext->interrupt_mode = irq_mode; + if (irq_mode == PCIE_PORT_NO_IRQ) { + /* + * Don't use service devices that require interrupts if there is + * no way to generate them. + */ + if (!(capabilities & PCIE_PORT_SERVICE_VC)) { + status = -ENODEV; + goto Error; + } + capabilities = PCIE_PORT_SERVICE_VC; + } + port_data->port_irq_mode = irq_mode; + + status = pci_enable_device(dev); + if (status) + goto Error; + pci_set_master(dev); /* Allocate child services if any */ - for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { + for (i = 0, nr_serv = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) { struct pcie_device *child; + int service = 1 << i; + + if (!(capabilities & service)) + continue; - if (capabilities & (1 << i)) { - child = alloc_pcie_device( - dev, /* parent */ - type, /* port type */ - i, /* service type */ - vectors[i], /* irq */ - irq_mode /* interrupt mode */); - if (child) { - status = device_register(&child->device); - if (status) { - kfree(child); - continue; - } - get_device(&child->device); - } + child = alloc_pcie_device(dev, service, vectors[i]); + if (!child) + continue; + + status = device_register(&child->device); + if (status) { + kfree(child); + continue; } + + get_device(&child->device); + nr_serv++; + } + if (!nr_serv) { + pci_disable_device(dev); + status = -ENODEV; + goto Error; } + return 0; + + Error: + kfree(port_data); + return status; } #ifdef CONFIG_PM static int suspend_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; - pm_message_t state = * (pm_message_t *) data; if ((dev->bus == &pcie_port_bus_type) && (dev->driver)) { service_driver = to_service_driver(dev->driver); if (service_driver->suspend) - service_driver->suspend(to_pcie_device(dev), state); + service_driver->suspend(to_pcie_device(dev)); } return 0; } @@ -294,11 +423,10 @@ static int suspend_iter(struct device *dev, void *data) /** * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle - * @state: Representation of system power management transition in progress */ -int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state) +int pcie_port_device_suspend(struct device *dev) { - return device_for_each_child(&dev->dev, &state, suspend_iter); + return device_for_each_child(dev, NULL, suspend_iter); } static int resume_iter(struct device *dev, void *data) @@ -318,24 +446,17 @@ static int resume_iter(struct device *dev, void *data) * pcie_port_device_suspend - resume port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_resume(struct pci_dev *dev) +int pcie_port_device_resume(struct device *dev) { - return device_for_each_child(&dev->dev, NULL, resume_iter); + return device_for_each_child(dev, NULL, resume_iter); } -#endif +#endif /* PM */ static int remove_iter(struct device *dev, void *data) { - struct pcie_port_service_driver *service_driver; - if (dev->bus == &pcie_port_bus_type) { - if (dev->driver) { - service_driver = to_service_driver(dev->driver); - if (service_driver->remove) - service_driver->remove(to_pcie_device(dev)); - } - *(unsigned long*)data = (unsigned long)dev; - return 1; + put_device(dev); + device_unregister(dev); } return 0; } @@ -349,25 +470,21 @@ static int remove_iter(struct device *dev, void *data) */ void pcie_port_device_remove(struct pci_dev *dev) { - struct device *device; - unsigned long device_addr; - int interrupt_mode = PCIE_PORT_INTx_MODE; - int status; + struct pcie_port_data *port_data = pci_get_drvdata(dev); - do { - status = device_for_each_child(&dev->dev, &device_addr, remove_iter); - if (status) { - device = (struct device*)device_addr; - interrupt_mode = (to_pcie_device(device))->interrupt_mode; - put_device(device); - device_unregister(device); - } - } while (status); - /* Switch to INTx by default if MSI enabled */ - if (interrupt_mode == PCIE_PORT_MSIX_MODE) + device_for_each_child(&dev->dev, NULL, remove_iter); + pci_disable_device(dev); + + switch (port_data->port_irq_mode) { + case PCIE_PORT_MSIX_MODE: pci_disable_msix(dev); - else if (interrupt_mode == PCIE_PORT_MSI_MODE) + break; + case PCIE_PORT_MSI_MODE: pci_disable_msi(dev); + break; + } + + kfree(port_data); } /** @@ -392,7 +509,7 @@ static int pcie_port_probe_service(struct device *dev) return -ENODEV; pciedev = to_pcie_device(dev); - status = driver->probe(pciedev, driver->id_table); + status = driver->probe(pciedev); if (!status) { dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n", driver->name); diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 5ea566e20b37..b924e2463f85 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -32,11 +32,6 @@ MODULE_LICENSE("GPL"); /* global data */ static const char device_name[] = "pcieport-driver"; -static int pcie_portdrv_save_config(struct pci_dev *dev) -{ - return pci_save_state(dev); -} - static int pcie_portdrv_restore_config(struct pci_dev *dev) { int retval; @@ -49,21 +44,21 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev) } #ifdef CONFIG_PM -static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state) -{ - return pcie_port_device_suspend(dev, state); +static struct dev_pm_ops pcie_portdrv_pm_ops = { + .suspend = pcie_port_device_suspend, + .resume = pcie_port_device_resume, + .freeze = pcie_port_device_suspend, + .thaw = pcie_port_device_resume, + .poweroff = pcie_port_device_suspend, + .restore = pcie_port_device_resume, +}; -} +#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) -static int pcie_portdrv_resume(struct pci_dev *dev) -{ - pci_set_master(dev); - return pcie_port_device_resume(dev); -} -#else -#define pcie_portdrv_suspend NULL -#define pcie_portdrv_resume NULL -#endif +#else /* !PM */ + +#define PCIE_PORTDRV_PM_OPS NULL +#endif /* !PM */ /* * pcie_portdrv_probe - Probe PCI-Express port devices @@ -82,20 +77,15 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev, if (status) return status; - if (pci_enable_device(dev) < 0) - return -ENODEV; - - pci_set_master(dev); if (!dev->irq && dev->pin) { dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; " "check vendor BIOS\n", dev->vendor, dev->device); } - if (pcie_port_device_register(dev)) { - pci_disable_device(dev); - return -ENOMEM; - } + status = pcie_port_device_register(dev); + if (status) + return status; - pcie_portdrv_save_config(dev); + pci_save_state(dev); return 0; } @@ -104,7 +94,6 @@ static void pcie_portdrv_remove (struct pci_dev *dev) { pcie_port_device_remove(dev); pci_disable_device(dev); - kfree(pci_get_drvdata(dev)); } static int error_detected_iter(struct device *device, void *data) @@ -278,10 +267,9 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, - .suspend = pcie_portdrv_suspend, - .resume = pcie_portdrv_resume, - .err_handler = &pcie_portdrv_err_handler, + + .driver.pm = PCIE_PORTDRV_PM_OPS, }; static int __init pcie_portdrv_init(void) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 55ec44a27e89..e2f3dd098cfa 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -287,7 +287,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) struct resource *res; int i; - if (!dev) /* It's a host bus, nothing to read */ + if (!child->parent) /* It's a host bus, nothing to read */ return; if (dev->transparent) { @@ -511,21 +511,21 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, /* * If we already got to this bus through a different bridge, - * ignore it. This can happen with the i450NX chipset. + * don't re-add it. This can happen with the i450NX chipset. + * + * However, we continue to descend down the hierarchy and + * scan remaining child buses. */ - if (pci_find_bus(pci_domain_nr(bus), busnr)) { - dev_info(&dev->dev, "bus %04x:%02x already known\n", - pci_domain_nr(bus), busnr); - goto out; + child = pci_find_bus(pci_domain_nr(bus), busnr); + if (!child) { + child = pci_add_new_bus(bus, dev, busnr); + if (!child) + goto out; + child->primary = buses & 0xFF; + child->subordinate = (buses >> 16) & 0xFF; + child->bridge_ctl = bctl; } - child = pci_add_new_bus(bus, dev, busnr); - if (!child) - goto out; - child->primary = buses & 0xFF; - child->subordinate = (buses >> 16) & 0xFF; - child->bridge_ctl = bctl; - cmax = pci_scan_child_bus(child); if (cmax > max) max = cmax; @@ -674,6 +674,19 @@ static void pci_read_irq(struct pci_dev *dev) dev->irq = irq; } +static void set_pcie_port_type(struct pci_dev *pdev) +{ + int pos; + u16 reg16; + + pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!pos) + return; + pdev->is_pcie = 1; + pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); + pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; +} + #define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED) /** @@ -683,12 +696,33 @@ static void pci_read_irq(struct pci_dev *dev) * Initialize the device structure with information about the device's * vendor,class,memory and IO-space addresses,IRQ lines etc. * Called at initialisation of the PCI subsystem and by CardBus services. - * Returns 0 on success and -1 if unknown type of device (not normal, bridge - * or CardBus). + * Returns 0 on success and negative if unknown type of device (not normal, + * bridge or CardBus). */ -static int pci_setup_device(struct pci_dev * dev) +int pci_setup_device(struct pci_dev *dev) { u32 class; + u8 hdr_type; + struct pci_slot *slot; + + if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type)) + return -EIO; + + dev->sysdata = dev->bus->sysdata; + dev->dev.parent = dev->bus->bridge; + dev->dev.bus = &pci_bus_type; + dev->hdr_type = hdr_type & 0x7f; + dev->multifunction = !!(hdr_type & 0x80); + dev->error_state = pci_channel_io_normal; + set_pcie_port_type(dev); + + list_for_each_entry(slot, &dev->bus->slots, list) + if (PCI_SLOT(dev->devfn) == slot->number) + dev->slot = slot; + + /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) + set this higher, assuming the system even supports it. */ + dev->dma_mask = 0xffffffff; dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), dev->bus->number, PCI_SLOT(dev->devfn), @@ -703,12 +737,14 @@ static int pci_setup_device(struct pci_dev * dev) dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n", dev->vendor, dev->device, class, dev->hdr_type); + /* need to have dev->class ready */ + dev->cfg_size = pci_cfg_space_size(dev); + /* "Unknown power state" */ dev->current_state = PCI_UNKNOWN; /* Early fixups, before probing the BARs */ pci_fixup_device(pci_fixup_early, dev); - class = dev->class >> 8; switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ @@ -770,7 +806,7 @@ static int pci_setup_device(struct pci_dev * dev) default: /* unknown header */ dev_err(&dev->dev, "unknown header type %02x, " "ignoring device\n", dev->hdr_type); - return -1; + return -EIO; bad: dev_err(&dev->dev, "ignoring class %02x (doesn't match header " @@ -785,6 +821,7 @@ static int pci_setup_device(struct pci_dev * dev) static void pci_release_capabilities(struct pci_dev *dev) { pci_vpd_release(dev); + pci_iov_release(dev); } /** @@ -803,19 +840,6 @@ static void pci_release_dev(struct device *dev) kfree(pci_dev); } -static void set_pcie_port_type(struct pci_dev *pdev) -{ - int pos; - u16 reg16; - - pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (!pos) - return; - pdev->is_pcie = 1; - pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); - pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; -} - /** * pci_cfg_space_size - get the configuration space size of the PCI device. * @dev: PCI device @@ -847,6 +871,11 @@ int pci_cfg_space_size(struct pci_dev *dev) { int pos; u32 status; + u16 class; + + class = dev->class >> 8; + if (class == PCI_CLASS_BRIDGE_HOST) + return pci_cfg_space_size_ext(dev); pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { @@ -891,9 +920,7 @@ EXPORT_SYMBOL(alloc_pci_dev); static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) { struct pci_dev *dev; - struct pci_slot *slot; u32 l; - u8 hdr_type; int delay = 1; if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l)) @@ -920,34 +947,16 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) } } - if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type)) - return NULL; - dev = alloc_pci_dev(); if (!dev) return NULL; dev->bus = bus; - dev->sysdata = bus->sysdata; - dev->dev.parent = bus->bridge; - dev->dev.bus = &pci_bus_type; dev->devfn = devfn; - dev->hdr_type = hdr_type & 0x7f; - dev->multifunction = !!(hdr_type & 0x80); dev->vendor = l & 0xffff; dev->device = (l >> 16) & 0xffff; - dev->cfg_size = pci_cfg_space_size(dev); - dev->error_state = pci_channel_io_normal; - set_pcie_port_type(dev); - - list_for_each_entry(slot, &bus->slots, list) - if (PCI_SLOT(devfn) == slot->number) - dev->slot = slot; - /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) - set this higher, assuming the system even supports it. */ - dev->dma_mask = 0xffffffff; - if (pci_setup_device(dev) < 0) { + if (pci_setup_device(dev)) { kfree(dev); return NULL; } @@ -972,6 +981,9 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Alternative Routing-ID Forwarding */ pci_enable_ari(dev); + + /* Single Root I/O Virtualization */ + pci_iov_init(dev); } void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) @@ -1006,6 +1018,12 @@ struct pci_dev *__ref pci_scan_single_device(struct pci_bus *bus, int devfn) { struct pci_dev *dev; + dev = pci_get_slot(bus, devfn); + if (dev) { + pci_dev_put(dev); + return dev; + } + dev = pci_scan_device(bus, devfn); if (!dev) return NULL; @@ -1024,35 +1042,27 @@ EXPORT_SYMBOL(pci_scan_single_device); * Scan a PCI slot on the specified PCI bus for devices, adding * discovered devices to the @bus->devices list. New devices * will not have is_added set. + * + * Returns the number of new devices found. */ int pci_scan_slot(struct pci_bus *bus, int devfn) { - int func, nr = 0; - int scan_all_fns; - - scan_all_fns = pcibios_scan_all_fns(bus, devfn); - - for (func = 0; func < 8; func++, devfn++) { - struct pci_dev *dev; - - dev = pci_scan_single_device(bus, devfn); - if (dev) { - nr++; + int fn, nr = 0; + struct pci_dev *dev; - /* - * If this is a single function device, - * don't scan past the first function. - */ - if (!dev->multifunction) { - if (func > 0) { - dev->multifunction = 1; - } else { - break; - } + dev = pci_scan_single_device(bus, devfn); + if (dev && !dev->is_added) /* new device? */ + nr++; + + if ((dev && dev->multifunction) || + (!dev && pcibios_scan_all_fns(bus, devfn))) { + for (fn = 1; fn < 8; fn++) { + dev = pci_scan_single_device(bus, devfn + fn); + if (dev) { + if (!dev->is_added) + nr++; + dev->multifunction = 1; } - } else { - if (func == 0 && !scan_all_fns) - break; } } @@ -1074,12 +1084,21 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) for (devfn = 0; devfn < 0x100; devfn += 8) pci_scan_slot(bus, devfn); + /* Reserve buses for SR-IOV capability. */ + max += pci_iov_bus_range(bus); + /* * After performing arch-dependent fixup of the bus, look behind * all PCI-to-PCI bridges on this bus. */ - pr_debug("PCI: Fixups for bus %04x:%02x\n", pci_domain_nr(bus), bus->number); - pcibios_fixup_bus(bus); + if (!bus->is_added) { + pr_debug("PCI: Fixups for bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); + pcibios_fixup_bus(bus); + if (pci_is_root_bus(bus)) + bus->is_added = 1; + } + for (pass=0; pass < 2; pass++) list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || @@ -1114,7 +1133,7 @@ struct pci_bus * pci_create_bus(struct device *parent, if (!b) return NULL; - dev = kmalloc(sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev){ kfree(b); return NULL; @@ -1133,7 +1152,6 @@ struct pci_bus * pci_create_bus(struct device *parent, list_add_tail(&b->node, &pci_root_buses); up_write(&pci_bus_sem); - memset(dev, 0, sizeof(*dev)); dev->parent = parent; dev->release = pci_release_bus_bridge_dev; dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus); @@ -1193,6 +1211,38 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, EXPORT_SYMBOL(pci_scan_bus_parented); #ifdef CONFIG_HOTPLUG +/** + * pci_rescan_bus - scan a PCI bus for devices. + * @bus: PCI bus to scan + * + * Scan a PCI bus and child buses for new devices, adds them, + * and enables them. + * + * Returns the max number of subordinate bus discovered. + */ +unsigned int __devinit pci_rescan_bus(struct pci_bus *bus) +{ + unsigned int max; + struct pci_dev *dev; + + max = pci_scan_child_bus(bus); + + down_read(&pci_bus_sem); + list_for_each_entry(dev, &bus->devices, bus_list) + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + if (dev->subordinate) + pci_bus_size_bridges(dev->subordinate); + up_read(&pci_bus_sem); + + pci_bus_assign_resources(bus); + pci_enable_bridges(bus); + pci_bus_add_devices(bus); + + return max; +} +EXPORT_SYMBOL_GPL(pci_rescan_bus); + EXPORT_SYMBOL(pci_add_new_bus); EXPORT_SYMBOL(pci_scan_slot); EXPORT_SYMBOL(pci_scan_bridge); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 92b9efe9bcaf..9b2f0d96900d 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -24,6 +24,7 @@ #include <linux/kallsyms.h> #include <linux/dmi.h> #include <linux/pci-aspm.h> +#include <linux/ioport.h> #include "pci.h" int isa_dma_bridge_buggy; @@ -34,6 +35,65 @@ int pcie_mch_quirk; EXPORT_SYMBOL(pcie_mch_quirk); #ifdef CONFIG_PCI_QUIRKS +/* + * This quirk function disables the device and releases resources + * which is specified by kernel's boot parameter 'pci=resource_alignment='. + * It also rounds up size to specified alignment. + * Later on, the kernel will assign page-aligned memory resource back + * to that device. + */ +static void __devinit quirk_resource_alignment(struct pci_dev *dev) +{ + int i; + struct resource *r; + resource_size_t align, size; + + if (!pci_is_reassigndev(dev)) + return; + + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) { + dev_warn(&dev->dev, + "Can't reassign resources to host bridge.\n"); + return; + } + + dev_info(&dev->dev, "Disabling device and release resources.\n"); + pci_disable_device(dev); + + align = pci_specified_resource_alignment(dev); + for (i=0; i < PCI_BRIDGE_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + size = resource_size(r); + if (size < align) { + size = align; + dev_info(&dev->dev, + "Rounding up size of resource #%d to %#llx.\n", + i, (unsigned long long)size); + } + r->end = size - 1; + r->start = 0; + } + /* Need to disable bridge's resource window, + * to enable the kernel to reassign new resource + * window later on. + */ + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + r->end = resource_size(r) - 1; + r->start = 0; + } + pci_disable_bridge_window(dev); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment); + /* The Mellanox Tavor device gives false positive parity errors * Mark this device with a broken_parity_status, to allow * PCI scanning code to "skip" this now blacklisted device. @@ -1126,10 +1186,15 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev) * its on-board VGA controller */ asus_hides_smbus = 1; } - else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG) + else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2) switch(dev->subsystem_device) { case 0x00b8: /* Compaq Evo D510 CMT */ case 0x00b9: /* Compaq Evo D510 SFF */ + /* Motherboard doesn't have Host bridge + * subvendor/subdevice IDs and on-board VGA + * controller is disabled if an AGP card is + * inserted, therefore checking USB UHCI + * Controller #1 */ asus_hides_smbus = 1; } else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC) @@ -1154,7 +1219,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82855GM_HB, as DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3, asus_hides_smbus_hostbridge); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG, asus_hides_smbus_hostbridge); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_2, asus_hides_smbus_hostbridge); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC, asus_hides_smbus_hostbridge); static void asus_hides_smbus_lpc(struct pci_dev *dev) @@ -1664,9 +1729,13 @@ static void __devinit quirk_netmos(struct pci_dev *dev) * of parallel ports and <S> is the number of serial ports. */ switch (dev->device) { + case PCI_DEVICE_ID_NETMOS_9835: + /* Well, this rule doesn't hold for the following 9835 device */ + if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM && + dev->subsystem_device == 0x0299) + return; case PCI_DEVICE_ID_NETMOS_9735: case PCI_DEVICE_ID_NETMOS_9745: - case PCI_DEVICE_ID_NETMOS_9835: case PCI_DEVICE_ID_NETMOS_9845: case PCI_DEVICE_ID_NETMOS_9855: if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL && @@ -2078,6 +2147,92 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15, nvenet_msi_disable); +static int __devinit ht_check_msi_mapping(struct pci_dev *dev) +{ + int pos, ttl = 48; + int found = 0; + + /* check if there is HT MSI cap or enabled on this device */ + pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING); + while (pos && ttl--) { + u8 flags; + + if (found < 1) + found = 1; + if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS, + &flags) == 0) { + if (flags & HT_MSI_FLAGS_ENABLE) { + if (found < 2) { + found = 2; + break; + } + } + } + pos = pci_find_next_ht_capability(dev, pos, + HT_CAPTYPE_MSI_MAPPING); + } + + return found; +} + +static int __devinit host_bridge_with_leaf(struct pci_dev *host_bridge) +{ + struct pci_dev *dev; + int pos; + int i, dev_no; + int found = 0; + + dev_no = host_bridge->devfn >> 3; + for (i = dev_no + 1; i < 0x20; i++) { + dev = pci_get_slot(host_bridge->bus, PCI_DEVFN(i, 0)); + if (!dev) + continue; + + /* found next host bridge ?*/ + pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE); + if (pos != 0) { + pci_dev_put(dev); + break; + } + + if (ht_check_msi_mapping(dev)) { + found = 1; + pci_dev_put(dev); + break; + } + pci_dev_put(dev); + } + + return found; +} + +#define PCI_HT_CAP_SLAVE_CTRL0 4 /* link control */ +#define PCI_HT_CAP_SLAVE_CTRL1 8 /* link control to */ + +static int __devinit is_end_of_ht_chain(struct pci_dev *dev) +{ + int pos, ctrl_off; + int end = 0; + u16 flags, ctrl; + + pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE); + + if (!pos) + goto out; + + pci_read_config_word(dev, pos + PCI_CAP_FLAGS, &flags); + + ctrl_off = ((flags >> 10) & 1) ? + PCI_HT_CAP_SLAVE_CTRL0 : PCI_HT_CAP_SLAVE_CTRL1; + pci_read_config_word(dev, pos + ctrl_off, &ctrl); + + if (ctrl & (1 << 6)) + end = 1; + +out: + return end; +} + static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev) { struct pci_dev *host_bridge; @@ -2102,6 +2257,11 @@ static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev) if (!found) return; + /* don't enable end_device/host_bridge with leaf directly here */ + if (host_bridge == dev && is_end_of_ht_chain(host_bridge) && + host_bridge_with_leaf(host_bridge)) + goto out; + /* root did that ! */ if (msi_ht_cap_enabled(host_bridge)) goto out; @@ -2132,44 +2292,12 @@ static void __devinit ht_disable_msi_mapping(struct pci_dev *dev) } } -static int __devinit ht_check_msi_mapping(struct pci_dev *dev) -{ - int pos, ttl = 48; - int found = 0; - - /* check if there is HT MSI cap or enabled on this device */ - pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING); - while (pos && ttl--) { - u8 flags; - - if (found < 1) - found = 1; - if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS, - &flags) == 0) { - if (flags & HT_MSI_FLAGS_ENABLE) { - if (found < 2) { - found = 2; - break; - } - } - } - pos = pci_find_next_ht_capability(dev, pos, - HT_CAPTYPE_MSI_MAPPING); - } - - return found; -} - -static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev) +static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all) { struct pci_dev *host_bridge; int pos; int found; - /* Enabling HT MSI mapping on this device breaks MCP51 */ - if (dev->device == 0x270) - return; - /* check if there is HT MSI cap or enabled on this device */ found = ht_check_msi_mapping(dev); @@ -2193,7 +2321,10 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev) /* Host bridge is to HT */ if (found == 1) { /* it is not enabled, try to enable it */ - nv_ht_enable_msi_mapping(dev); + if (all) + ht_enable_msi_mapping(dev); + else + nv_ht_enable_msi_mapping(dev); } return; } @@ -2205,8 +2336,20 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev) /* Host bridge is not to HT, disable HT MSI mapping on this device */ ht_disable_msi_mapping(dev); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk); + +static void __devinit nv_msi_ht_cap_quirk_all(struct pci_dev *dev) +{ + return __nv_msi_ht_cap_quirk(dev, 1); +} + +static void __devinit nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev) +{ + return __nv_msi_ht_cap_quirk(dev, 0); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf); + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all); static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev) { diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 042e08924421..86503c14ce7e 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -71,6 +71,9 @@ void pci_remove_bus(struct pci_bus *pci_bus) down_write(&pci_bus_sem); list_del(&pci_bus->node); up_write(&pci_bus_sem); + if (!pci_bus->is_added) + return; + pci_remove_legacy_files(pci_bus); device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity); device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity); @@ -92,6 +95,7 @@ EXPORT_SYMBOL(pci_remove_bus); */ void pci_remove_bus_device(struct pci_dev *dev) { + pci_stop_bus_device(dev); if (dev->subordinate) { struct pci_bus *b = dev->subordinate; diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 5af8bd538149..710d4ea69568 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -29,7 +29,7 @@ pci_find_upstream_pcie_bridge(struct pci_dev *pdev) if (pdev->is_pcie) return NULL; while (1) { - if (!pdev->bus->self) + if (!pdev->bus->parent) break; pdev = pdev->bus->self; /* a p2p bridge */ diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 704608945780..334285a8e237 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -27,7 +27,7 @@ #include <linux/slab.h> -static void pbus_assign_resources_sorted(struct pci_bus *bus) +static void pbus_assign_resources_sorted(const struct pci_bus *bus) { struct pci_dev *dev; struct resource *res; @@ -144,6 +144,9 @@ static void pci_setup_bridge(struct pci_bus *bus) struct pci_bus_region region; u32 l, bu, lu, io_upper16; + if (!pci_is_root_bus(bus) && bus->is_added) + return; + dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n", pci_domain_nr(bus), bus->number); @@ -495,7 +498,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_size_bridges); -void __ref pci_bus_assign_resources(struct pci_bus *bus) +void __ref pci_bus_assign_resources(const struct pci_bus *bus) { struct pci_bus *b; struct pci_dev *dev; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 32e8d88a4619..3039fcb86afc 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -120,6 +120,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource) return err; } +#ifdef CONFIG_PCI_QUIRKS +void pci_disable_bridge_window(struct pci_dev *dev) +{ + dev_dbg(&dev->dev, "Disabling bridge window.\n"); + + /* MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); + + /* Prefetchable MMIO Base/Limit */ + pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0); + pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0); + pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff); +} +#endif /* CONFIG_PCI_QUIRKS */ + int pci_assign_resource(struct pci_dev *dev, int resno) { struct pci_bus *bus = dev->bus; diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 5a8ccb4f604d..21189447e545 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -1,8 +1,8 @@ /* * drivers/pci/slot.c * Copyright (C) 2006 Matthew Wilcox <matthew@wil.cx> - * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P. - * Alex Chiang <achiang@hp.com> + * Copyright (C) 2006-2009 Hewlett-Packard Development Company, L.P. + * Alex Chiang <achiang@hp.com> */ #include <linux/kobject.h> @@ -52,8 +52,8 @@ static void pci_slot_release(struct kobject *kobj) struct pci_dev *dev; struct pci_slot *slot = to_pci_slot(kobj); - pr_debug("%s: releasing pci_slot on %x:%d\n", __func__, - slot->bus->number, slot->number); + dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n", + slot->number, pci_slot_name(slot)); list_for_each_entry(dev, &slot->bus->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot->number) @@ -248,9 +248,8 @@ placeholder: if (PCI_SLOT(dev->devfn) == slot_nr) dev->slot = slot; - /* Don't care if debug printk has a -1 for slot_nr */ - pr_debug("%s: created pci_slot on %04x:%02x:%02x\n", - __func__, pci_domain_nr(parent), parent->number, slot_nr); + dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n", + slot_nr, pci_slot_name(slot)); out: kfree(slot_name); @@ -299,9 +298,8 @@ EXPORT_SYMBOL_GPL(pci_renumber_slot); */ void pci_destroy_slot(struct pci_slot *slot) { - pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__, - atomic_read(&slot->kobj.kref.refcount) - 1, - pci_domain_nr(slot->bus), slot->bus->number, slot->number); + dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n", + slot->number, atomic_read(&slot->kobj.kref.refcount) - 1); down_write(&pci_bus_sem); kobject_put(&slot->kobj); diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 81450fbd8b12..09d5cd33a3f6 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -129,13 +129,14 @@ comment "I2C RTC drivers" if I2C config RTC_DRV_DS1307 - tristate "Dallas/Maxim DS1307/37/38/39/40, ST M41T00" + tristate "Dallas/Maxim DS1307/37/38/39/40, ST M41T00, EPSON RX-8025" help If you say yes here you get support for various compatible RTC chips (often with battery backup) connected with I2C. This driver should handle DS1307, DS1337, DS1338, DS1339, DS1340, ST M41T00, - and probably other chips. In some cases the RTC must already - have been initialized (by manufacturing or a bootloader). + EPSON RX-8025 and probably other chips. In some cases the RTC + must already have been initialized (by manufacturing or a + bootloader). The first seven registers on these chips hold an RTC, and other registers may add features such as NVRAM, a trickle charger for @@ -440,6 +441,16 @@ config RTC_DRV_DS1742 This driver can also be built as a module. If so, the module will be called rtc-ds1742. +config RTC_DRV_EFI + tristate "EFI RTC" + depends on IA64 + help + If you say yes here you will get support for the EFI + Real Time Clock. + + This driver can also be built as a module. If so, the module + will be called rtc-efi. + config RTC_DRV_STK17TA8 tristate "Simtek STK17TA8" depends on RTC_CLASS diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 0e697aa51caa..e7b09986d26e 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_RTC_DRV_DS1553) += rtc-ds1553.o obj-$(CONFIG_RTC_DRV_DS1672) += rtc-ds1672.o obj-$(CONFIG_RTC_DRV_DS1742) += rtc-ds1742.o obj-$(CONFIG_RTC_DRV_DS3234) += rtc-ds3234.o +obj-$(CONFIG_RTC_DRV_EFI) += rtc-efi.o obj-$(CONFIG_RTC_DRV_EP93XX) += rtc-ep93xx.o obj-$(CONFIG_RTC_DRV_FM3130) += rtc-fm3130.o obj-$(CONFIG_RTC_DRV_ISL1208) += rtc-isl1208.o diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 7e5155e88ac7..2c4a65302a9d 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -3,6 +3,7 @@ * * Copyright (C) 2005 James Chapman (ds1337 core) * Copyright (C) 2006 David Brownell + * Copyright (C) 2009 Matthias Fuchs (rx8025 support) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -31,6 +32,7 @@ enum ds_type { ds_1339, ds_1340, m41t00, + rx_8025, // rs5c372 too? different address... }; @@ -83,6 +85,12 @@ enum ds_type { #define DS1339_REG_ALARM1_SECS 0x07 #define DS1339_REG_TRICKLE 0x10 +#define RX8025_REG_CTRL1 0x0e +# define RX8025_BIT_2412 0x20 +#define RX8025_REG_CTRL2 0x0f +# define RX8025_BIT_PON 0x10 +# define RX8025_BIT_VDET 0x40 +# define RX8025_BIT_XST 0x20 struct ds1307 { @@ -94,6 +102,10 @@ struct ds1307 { struct i2c_client *client; struct rtc_device *rtc; struct work_struct work; + s32 (*read_block_data)(struct i2c_client *client, u8 command, + u8 length, u8 *values); + s32 (*write_block_data)(struct i2c_client *client, u8 command, + u8 length, const u8 *values); }; struct chip_desc { @@ -117,6 +129,8 @@ static const struct chip_desc chips[] = { [ds_1340] = { }, [m41t00] = { +}, +[rx_8025] = { }, }; static const struct i2c_device_id ds1307_id[] = { @@ -126,12 +140,86 @@ static const struct i2c_device_id ds1307_id[] = { { "ds1339", ds_1339 }, { "ds1340", ds_1340 }, { "m41t00", m41t00 }, + { "rx8025", rx_8025 }, { } }; MODULE_DEVICE_TABLE(i2c, ds1307_id); /*----------------------------------------------------------------------*/ +#define BLOCK_DATA_MAX_TRIES 10 + +static s32 ds1307_read_block_data_once(struct i2c_client *client, u8 command, + u8 length, u8 *values) +{ + s32 i, data; + + for (i = 0; i < length; i++) { + data = i2c_smbus_read_byte_data(client, command + i); + if (data < 0) + return data; + values[i] = data; + } + return i; +} + +static s32 ds1307_read_block_data(struct i2c_client *client, u8 command, + u8 length, u8 *values) +{ + u8 oldvalues[I2C_SMBUS_BLOCK_MAX]; + s32 ret; + int tries = 0; + + dev_dbg(&client->dev, "ds1307_read_block_data (length=%d)\n", length); + ret = ds1307_read_block_data_once(client, command, length, values); + if (ret < 0) + return ret; + do { + if (++tries > BLOCK_DATA_MAX_TRIES) { + dev_err(&client->dev, + "ds1307_read_block_data failed\n"); + return -EIO; + } + memcpy(oldvalues, values, length); + ret = ds1307_read_block_data_once(client, command, length, + values); + if (ret < 0) + return ret; + } while (memcmp(oldvalues, values, length)); + return length; +} + +static s32 ds1307_write_block_data(struct i2c_client *client, u8 command, + u8 length, const u8 *values) +{ + u8 currvalues[I2C_SMBUS_BLOCK_MAX]; + int tries = 0; + + dev_dbg(&client->dev, "ds1307_write_block_data (length=%d)\n", length); + do { + s32 i, ret; + + if (++tries > BLOCK_DATA_MAX_TRIES) { + dev_err(&client->dev, + "ds1307_write_block_data failed\n"); + return -EIO; + } + for (i = 0; i < length; i++) { + ret = i2c_smbus_write_byte_data(client, command + i, + values[i]); + if (ret < 0) + return ret; + } + ret = ds1307_read_block_data_once(client, command, length, + currvalues); + if (ret < 0) + return ret; + } while (memcmp(currvalues, values, length)); + return length; +} + +/*----------------------------------------------------------------------*/ + /* * The IRQ logic includes a "real" handler running in IRQ context just * long enough to schedule this workqueue entry. We need a task context @@ -202,7 +290,7 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t) int tmp; /* read the RTC date and time registers all at once */ - tmp = i2c_smbus_read_i2c_block_data(ds1307->client, + tmp = ds1307->read_block_data(ds1307->client, DS1307_REG_SECS, 7, ds1307->regs); if (tmp != 7) { dev_err(dev, "%s error %d\n", "read", tmp); @@ -279,7 +367,7 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t) "write", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6]); - result = i2c_smbus_write_i2c_block_data(ds1307->client, 0, 7, buf); + result = ds1307->write_block_data(ds1307->client, 0, 7, buf); if (result < 0) { dev_err(dev, "%s error %d\n", "write", result); return result; @@ -297,7 +385,7 @@ static int ds1337_read_alarm(struct device *dev, struct rtc_wkalrm *t) return -EINVAL; /* read all ALARM1, ALARM2, and status registers at once */ - ret = i2c_smbus_read_i2c_block_data(client, + ret = ds1307->read_block_data(client, DS1339_REG_ALARM1_SECS, 9, ds1307->regs); if (ret != 9) { dev_err(dev, "%s error %d\n", "alarm read", ret); @@ -356,7 +444,7 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t) t->enabled, t->pending); /* read current status of both alarms and the chip */ - ret = i2c_smbus_read_i2c_block_data(client, + ret = ds1307->read_block_data(client, DS1339_REG_ALARM1_SECS, 9, buf); if (ret != 9) { dev_err(dev, "%s error %d\n", "alarm write", ret); @@ -391,7 +479,7 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t) } buf[8] = status & ~(DS1337_BIT_A1I | DS1337_BIT_A2I); - ret = i2c_smbus_write_i2c_block_data(client, + ret = ds1307->write_block_data(client, DS1339_REG_ALARM1_SECS, 9, buf); if (ret < 0) { dev_err(dev, "can't set alarm time\n"); @@ -479,7 +567,7 @@ ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr, if (unlikely(!count)) return count; - result = i2c_smbus_read_i2c_block_data(client, 8 + off, count, buf); + result = ds1307->read_block_data(client, 8 + off, count, buf); if (result < 0) dev_err(&client->dev, "%s error %d\n", "nvram read", result); return result; @@ -490,9 +578,11 @@ ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct i2c_client *client; + struct ds1307 *ds1307; int result; client = kobj_to_i2c_client(kobj); + ds1307 = i2c_get_clientdata(client); if (unlikely(off >= NVRAM_SIZE)) return -EFBIG; @@ -501,7 +591,7 @@ ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr, if (unlikely(!count)) return count; - result = i2c_smbus_write_i2c_block_data(client, 8 + off, count, buf); + result = ds1307->write_block_data(client, 8 + off, count, buf); if (result < 0) { dev_err(&client->dev, "%s error %d\n", "nvram write", result); return result; @@ -535,9 +625,8 @@ static int __devinit ds1307_probe(struct i2c_client *client, int want_irq = false; unsigned char *buf; - if (!i2c_check_functionality(adapter, - I2C_FUNC_SMBUS_WRITE_BYTE_DATA | - I2C_FUNC_SMBUS_I2C_BLOCK)) + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA) + && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) return -EIO; if (!(ds1307 = kzalloc(sizeof(struct ds1307), GFP_KERNEL))) @@ -547,6 +636,13 @@ static int __devinit ds1307_probe(struct i2c_client *client, i2c_set_clientdata(client, ds1307); ds1307->type = id->driver_data; buf = ds1307->regs; + if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) { + ds1307->read_block_data = i2c_smbus_read_i2c_block_data; + ds1307->write_block_data = i2c_smbus_write_i2c_block_data; + } else { + ds1307->read_block_data = ds1307_read_block_data; + ds1307->write_block_data = ds1307_write_block_data; + } switch (ds1307->type) { case ds_1337: @@ -557,7 +653,7 @@ static int __devinit ds1307_probe(struct i2c_client *client, want_irq = true; } /* get registers that the "rtc" read below won't read... */ - tmp = i2c_smbus_read_i2c_block_data(ds1307->client, + tmp = ds1307->read_block_data(ds1307->client, DS1337_REG_CONTROL, 2, buf); if (tmp != 2) { pr_debug("read error %d\n", tmp); @@ -589,13 +685,79 @@ static int __devinit ds1307_probe(struct i2c_client *client, dev_warn(&client->dev, "SET TIME!\n"); } break; + + case rx_8025: + tmp = i2c_smbus_read_i2c_block_data(ds1307->client, + RX8025_REG_CTRL1 << 4 | 0x08, 2, buf); + if (tmp != 2) { + pr_debug("read error %d\n", tmp); + err = -EIO; + goto exit_free; + } + + /* oscillator off? turn it on, so clock can tick. */ + if (!(ds1307->regs[1] & RX8025_BIT_XST)) { + ds1307->regs[1] |= RX8025_BIT_XST; + i2c_smbus_write_byte_data(client, + RX8025_REG_CTRL2 << 4 | 0x08, + ds1307->regs[1]); + dev_warn(&client->dev, + "oscillator stop detected - SET TIME!\n"); + } + + if (ds1307->regs[1] & RX8025_BIT_PON) { + ds1307->regs[1] &= ~RX8025_BIT_PON; + i2c_smbus_write_byte_data(client, + RX8025_REG_CTRL2 << 4 | 0x08, + ds1307->regs[1]); + dev_warn(&client->dev, "power-on detected\n"); + } + + if (ds1307->regs[1] & RX8025_BIT_VDET) { + ds1307->regs[1] &= ~RX8025_BIT_VDET; + i2c_smbus_write_byte_data(client, + RX8025_REG_CTRL2 << 4 | 0x08, + ds1307->regs[1]); + dev_warn(&client->dev, "voltage drop detected\n"); + } + + /* make sure we are running in 24hour mode */ + if (!(ds1307->regs[0] & RX8025_BIT_2412)) { + u8 hour; + + /* switch to 24 hour mode */ + i2c_smbus_write_byte_data(client, + RX8025_REG_CTRL1 << 4 | 0x08, + ds1307->regs[0] | + RX8025_BIT_2412); + + tmp = i2c_smbus_read_i2c_block_data(ds1307->client, + RX8025_REG_CTRL1 << 4 | 0x08, 2, buf); + if (tmp != 2) { + pr_debug("read error %d\n", tmp); + err = -EIO; + goto exit_free; + } + + /* correct hour */ + hour = bcd2bin(ds1307->regs[DS1307_REG_HOUR]); + if (hour == 12) + hour = 0; + if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM) + hour += 12; + + i2c_smbus_write_byte_data(client, + DS1307_REG_HOUR << 4 | 0x08, + hour); + } + break; default: break; } read_rtc: /* read RTC registers */ - tmp = i2c_smbus_read_i2c_block_data(ds1307->client, 0, 8, buf); + tmp = ds1307->read_block_data(ds1307->client, 0, 8, buf); if (tmp != 8) { pr_debug("read error %d\n", tmp); err = -EIO; @@ -649,6 +811,7 @@ read_rtc: dev_warn(&client->dev, "SET TIME!\n"); } break; + case rx_8025: case ds_1337: case ds_1339: break; @@ -662,6 +825,8 @@ read_rtc: * systems that will run through year 2100. */ break; + case rx_8025: + break; default: if (!(tmp & DS1307_BIT_12HR)) break; diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index a5b0fc09f0c6..4d32e328f6cd 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -222,16 +222,16 @@ static int ds1374_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) rtc_tm_to_time(&alarm->time, &new_alarm); rtc_tm_to_time(&now, &itime); - new_alarm -= itime; - /* This can happen due to races, in addition to dates that are * truly in the past. To avoid requiring the caller to check for * races, dates in the past are assumed to be in the recent past * (i.e. not something that we'd rather the caller know about via * an error), and the alarm is set to go off as soon as possible. */ - if (new_alarm <= 0) + if (time_before_eq(new_alarm, itime)) new_alarm = 1; + else + new_alarm -= itime; mutex_lock(&ds1374->mutex); diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c new file mode 100644 index 000000000000..550292304b0f --- /dev/null +++ b/drivers/rtc/rtc-efi.c @@ -0,0 +1,235 @@ +/* + * rtc-efi: RTC Class Driver for EFI-based systems + * + * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. + * + * Author: dann frazier <dannf@hp.com> + * Based on efirtc.c by Stephane Eranian + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/time.h> +#include <linux/platform_device.h> +#include <linux/rtc.h> +#include <linux/efi.h> + +#define EFI_ISDST (EFI_TIME_ADJUST_DAYLIGHT|EFI_TIME_IN_DAYLIGHT) +/* + * EFI Epoch is 1/1/1998 + */ +#define EFI_RTC_EPOCH 1998 + +/* + * returns day of the year [0-365] + */ +static inline int +compute_yday(efi_time_t *eft) +{ + /* efi_time_t.month is in the [1-12] so, we need -1 */ + return rtc_year_days(eft->day - 1, eft->month - 1, eft->year); +} +/* + * returns day of the week [0-6] 0=Sunday + * + * Don't try to provide a year that's before 1998, please ! + */ +static int +compute_wday(efi_time_t *eft) +{ + int y; + int ndays = 0; + + if (eft->year < 1998) { + printk(KERN_ERR "efirtc: EFI year < 1998, invalid date\n"); + return -1; + } + + for (y = EFI_RTC_EPOCH; y < eft->year; y++) + ndays += 365 + (is_leap_year(y) ? 1 : 0); + + ndays += compute_yday(eft); + + /* + * 4=1/1/1998 was a Thursday + */ + return (ndays + 4) % 7; +} + +static void +convert_to_efi_time(struct rtc_time *wtime, efi_time_t *eft) +{ + eft->year = wtime->tm_year + 1900; + eft->month = wtime->tm_mon + 1; + eft->day = wtime->tm_mday; + eft->hour = wtime->tm_hour; + eft->minute = wtime->tm_min; + eft->second = wtime->tm_sec; + eft->nanosecond = 0; + eft->daylight = wtime->tm_isdst ? EFI_ISDST : 0; + eft->timezone = EFI_UNSPECIFIED_TIMEZONE; +} + +static void +convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime) +{ + memset(wtime, 0, sizeof(*wtime)); + wtime->tm_sec = eft->second; + wtime->tm_min = eft->minute; + wtime->tm_hour = eft->hour; + wtime->tm_mday = eft->day; + wtime->tm_mon = eft->month - 1; + wtime->tm_year = eft->year - 1900; + + /* day of the week [0-6], Sunday=0 */ + wtime->tm_wday = compute_wday(eft); + + /* day in the year [1-365]*/ + wtime->tm_yday = compute_yday(eft); + + + switch (eft->daylight & EFI_ISDST) { + case EFI_ISDST: + wtime->tm_isdst = 1; + break; + case EFI_TIME_ADJUST_DAYLIGHT: + wtime->tm_isdst = 0; + break; + default: + wtime->tm_isdst = -1; + } +} + +static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) +{ + efi_time_t eft; + efi_status_t status; + + /* + * As of EFI v1.10, this call always returns an unsupported status + */ + status = efi.get_wakeup_time((efi_bool_t *)&wkalrm->enabled, + (efi_bool_t *)&wkalrm->pending, &eft); + + if (status != EFI_SUCCESS) + return -EINVAL; + + convert_from_efi_time(&eft, &wkalrm->time); + + return rtc_valid_tm(&wkalrm->time); +} + +static int efi_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) +{ + efi_time_t eft; + efi_status_t status; + + convert_to_efi_time(&wkalrm->time, &eft); + + /* + * XXX Fixme: + * As of EFI 0.92 with the firmware I have on my + * machine this call does not seem to work quite + * right + * + * As of v1.10, this call always returns an unsupported status + */ + status = efi.set_wakeup_time((efi_bool_t)wkalrm->enabled, &eft); + + printk(KERN_WARNING "write status is %d\n", (int)status); + + return status == EFI_SUCCESS ? 0 : -EINVAL; +} + +static int efi_read_time(struct device *dev, struct rtc_time *tm) +{ + efi_status_t status; + efi_time_t eft; + efi_time_cap_t cap; + + status = efi.get_time(&eft, &cap); + + if (status != EFI_SUCCESS) { + /* should never happen */ + printk(KERN_ERR "efitime: can't read time\n"); + return -EINVAL; + } + + convert_from_efi_time(&eft, tm); + + return rtc_valid_tm(tm); +} + +static int efi_set_time(struct device *dev, struct rtc_time *tm) +{ + efi_status_t status; + efi_time_t eft; + + convert_to_efi_time(tm, &eft); + + status = efi.set_time(&eft); + + return status == EFI_SUCCESS ? 0 : -EINVAL; +} + +static const struct rtc_class_ops efi_rtc_ops = { + .read_time = efi_read_time, + .set_time = efi_set_time, + .read_alarm = efi_read_alarm, + .set_alarm = efi_set_alarm, +}; + +static int __init efi_rtc_probe(struct platform_device *dev) +{ + struct rtc_device *rtc; + + rtc = rtc_device_register("rtc-efi", &dev->dev, &efi_rtc_ops, + THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + platform_set_drvdata(dev, rtc); + + return 0; +} + +static int __exit efi_rtc_remove(struct platform_device *dev) +{ + struct rtc_device *rtc = platform_get_drvdata(dev); + + rtc_device_unregister(rtc); + + return 0; +} + +static struct platform_driver efi_rtc_driver = { + .driver = { + .name = "rtc-efi", + .owner = THIS_MODULE, + }, + .probe = efi_rtc_probe, + .remove = __exit_p(efi_rtc_remove), +}; + +static int __init efi_rtc_init(void) +{ + return platform_driver_probe(&efi_rtc_driver, efi_rtc_probe); +} + +static void __exit efi_rtc_exit(void) +{ + platform_driver_unregister(&efi_rtc_driver); +} + +module_init(efi_rtc_init); +module_exit(efi_rtc_exit); + +MODULE_AUTHOR("dann frazier <dannf@hp.com>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("EFI RTC driver"); diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c index dd70bf73ce9d..773851f338b8 100644 --- a/drivers/rtc/rtc-lib.c +++ b/drivers/rtc/rtc-lib.c @@ -26,14 +26,13 @@ static const unsigned short rtc_ydays[2][13] = { }; #define LEAPS_THRU_END_OF(y) ((y)/4 - (y)/100 + (y)/400) -#define LEAP_YEAR(year) ((!(year % 4) && (year % 100)) || !(year % 400)) /* * The number of days in the month. */ int rtc_month_days(unsigned int month, unsigned int year) { - return rtc_days_in_month[month] + (LEAP_YEAR(year) && month == 1); + return rtc_days_in_month[month] + (is_leap_year(year) && month == 1); } EXPORT_SYMBOL(rtc_month_days); @@ -42,7 +41,7 @@ EXPORT_SYMBOL(rtc_month_days); */ int rtc_year_days(unsigned int day, unsigned int month, unsigned int year) { - return rtc_ydays[LEAP_YEAR(year)][month] + day-1; + return rtc_ydays[is_leap_year(year)][month] + day-1; } EXPORT_SYMBOL(rtc_year_days); @@ -66,7 +65,7 @@ void rtc_time_to_tm(unsigned long time, struct rtc_time *tm) - LEAPS_THRU_END_OF(1970 - 1); if (days < 0) { year -= 1; - days += 365 + LEAP_YEAR(year); + days += 365 + is_leap_year(year); } tm->tm_year = year - 1900; tm->tm_yday = days + 1; diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c index c6bfa6fe1a2a..b966f56da976 100644 --- a/drivers/rtc/rtc-parisc.c +++ b/drivers/rtc/rtc-parisc.c @@ -7,41 +7,25 @@ #include <linux/module.h> #include <linux/time.h> #include <linux/platform_device.h> +#include <linux/rtc.h> #include <asm/rtc.h> -/* as simple as can be, and no simpler. */ -struct parisc_rtc { - struct rtc_device *rtc; - spinlock_t lock; -}; - static int parisc_get_time(struct device *dev, struct rtc_time *tm) { - struct parisc_rtc *p = dev_get_drvdata(dev); - unsigned long flags, ret; + unsigned long ret; - spin_lock_irqsave(&p->lock, flags); ret = get_rtc_time(tm); - spin_unlock_irqrestore(&p->lock, flags); if (ret & RTC_BATT_BAD) return -EOPNOTSUPP; - return 0; + return rtc_valid_tm(tm); } static int parisc_set_time(struct device *dev, struct rtc_time *tm) { - struct parisc_rtc *p = dev_get_drvdata(dev); - unsigned long flags; - int ret; - - spin_lock_irqsave(&p->lock, flags); - ret = set_rtc_time(tm); - spin_unlock_irqrestore(&p->lock, flags); - - if (ret < 0) + if (set_rtc_time(tm) < 0) return -EOPNOTSUPP; return 0; @@ -52,35 +36,25 @@ static const struct rtc_class_ops parisc_rtc_ops = { .set_time = parisc_set_time, }; -static int __devinit parisc_rtc_probe(struct platform_device *dev) +static int __init parisc_rtc_probe(struct platform_device *dev) { - struct parisc_rtc *p; - - p = kzalloc(sizeof (*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - spin_lock_init(&p->lock); + struct rtc_device *rtc; - p->rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops, - THIS_MODULE); - if (IS_ERR(p->rtc)) { - int err = PTR_ERR(p->rtc); - kfree(p); - return err; - } + rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops, + THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); - platform_set_drvdata(dev, p); + platform_set_drvdata(dev, rtc); return 0; } -static int __devexit parisc_rtc_remove(struct platform_device *dev) +static int __exit parisc_rtc_remove(struct platform_device *dev) { - struct parisc_rtc *p = platform_get_drvdata(dev); + struct rtc_device *rtc = platform_get_drvdata(dev); - rtc_device_unregister(p->rtc); - kfree(p); + rtc_device_unregister(rtc); return 0; } @@ -96,7 +70,7 @@ static struct platform_driver parisc_rtc_driver = { static int __init parisc_rtc_init(void) { - return platform_driver_register(&parisc_rtc_driver); + return platform_driver_probe(&parisc_rtc_driver, parisc_rtc_probe); } static void __exit parisc_rtc_fini(void) diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c index 14d4f036a768..66955cc9c746 100644 --- a/drivers/rtc/rtc-v3020.c +++ b/drivers/rtc/rtc-v3020.c @@ -28,7 +28,7 @@ #include <linux/rtc-v3020.h> #include <linux/delay.h> -#include <asm/io.h> +#include <linux/io.h> #undef DEBUG @@ -63,7 +63,7 @@ static void v3020_set_reg(struct v3020 *chip, unsigned char address, static unsigned char v3020_get_reg(struct v3020 *chip, unsigned char address) { - unsigned int data=0; + unsigned int data = 0; int i; for (i = 0; i < 4; i++) { @@ -106,16 +106,14 @@ static int v3020_read_time(struct device *dev, struct rtc_time *dt) tmp = v3020_get_reg(chip, V3020_YEAR); dt->tm_year = bcd2bin(tmp)+100; -#ifdef DEBUG - printk("\n%s : Read RTC values\n",__func__); - printk("tm_hour: %i\n",dt->tm_hour); - printk("tm_min : %i\n",dt->tm_min); - printk("tm_sec : %i\n",dt->tm_sec); - printk("tm_year: %i\n",dt->tm_year); - printk("tm_mon : %i\n",dt->tm_mon); - printk("tm_mday: %i\n",dt->tm_mday); - printk("tm_wday: %i\n",dt->tm_wday); -#endif + dev_dbg(dev, "\n%s : Read RTC values\n", __func__); + dev_dbg(dev, "tm_hour: %i\n", dt->tm_hour); + dev_dbg(dev, "tm_min : %i\n", dt->tm_min); + dev_dbg(dev, "tm_sec : %i\n", dt->tm_sec); + dev_dbg(dev, "tm_year: %i\n", dt->tm_year); + dev_dbg(dev, "tm_mon : %i\n", dt->tm_mon); + dev_dbg(dev, "tm_mday: %i\n", dt->tm_mday); + dev_dbg(dev, "tm_wday: %i\n", dt->tm_wday); return 0; } @@ -125,15 +123,13 @@ static int v3020_set_time(struct device *dev, struct rtc_time *dt) { struct v3020 *chip = dev_get_drvdata(dev); -#ifdef DEBUG - printk("\n%s : Setting RTC values\n",__func__); - printk("tm_sec : %i\n",dt->tm_sec); - printk("tm_min : %i\n",dt->tm_min); - printk("tm_hour: %i\n",dt->tm_hour); - printk("tm_mday: %i\n",dt->tm_mday); - printk("tm_wday: %i\n",dt->tm_wday); - printk("tm_year: %i\n",dt->tm_year); -#endif + dev_dbg(dev, "\n%s : Setting RTC values\n", __func__); + dev_dbg(dev, "tm_sec : %i\n", dt->tm_sec); + dev_dbg(dev, "tm_min : %i\n", dt->tm_min); + dev_dbg(dev, "tm_hour: %i\n", dt->tm_hour); + dev_dbg(dev, "tm_mday: %i\n", dt->tm_mday); + dev_dbg(dev, "tm_wday: %i\n", dt->tm_wday); + dev_dbg(dev, "tm_year: %i\n", dt->tm_year); /* Write all the values to ram... */ v3020_set_reg(chip, V3020_SECONDS, bin2bcd(dt->tm_sec)); @@ -191,7 +187,7 @@ static int rtc_probe(struct platform_device *pdev) /* Test chip by doing a write/read sequence * to the chip ram */ v3020_set_reg(chip, V3020_SECONDS, 0x33); - if(v3020_get_reg(chip, V3020_SECONDS) != 0x33) { + if (v3020_get_reg(chip, V3020_SECONDS) != 0x33) { retval = -ENODEV; goto err_io; } diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c index 5c5e3aa91385..c91edc572eb6 100644 --- a/drivers/rtc/rtc-wm8350.c +++ b/drivers/rtc/rtc-wm8350.c @@ -122,7 +122,7 @@ static int wm8350_rtc_settime(struct device *dev, struct rtc_time *tm) do { rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); schedule_timeout_uninterruptible(msecs_to_jiffies(1)); - } while (retries-- && !(rtc_ctrl & WM8350_RTC_STS)); + } while (--retries && !(rtc_ctrl & WM8350_RTC_STS)); if (!retries) { dev_err(dev, "timed out on set confirmation\n"); @@ -236,6 +236,17 @@ static int wm8350_rtc_start_alarm(struct wm8350 *wm8350) return 0; } +static int wm8350_rtc_alarm_irq_enable(struct device *dev, + unsigned int enabled) +{ + struct wm8350 *wm8350 = dev_get_drvdata(dev); + + if (enabled) + return wm8350_rtc_start_alarm(wm8350); + else + return wm8350_rtc_stop_alarm(wm8350); +} + static int wm8350_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) { struct wm8350 *wm8350 = dev_get_drvdata(dev); @@ -291,30 +302,15 @@ static int wm8350_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) return ret; } -/* - * Handle commands from user-space - */ -static int wm8350_rtc_ioctl(struct device *dev, unsigned int cmd, - unsigned long arg) +static int wm8350_rtc_update_irq_enable(struct device *dev, + unsigned int enabled) { struct wm8350 *wm8350 = dev_get_drvdata(dev); - switch (cmd) { - case RTC_AIE_OFF: - return wm8350_rtc_stop_alarm(wm8350); - case RTC_AIE_ON: - return wm8350_rtc_start_alarm(wm8350); - - case RTC_UIE_OFF: - wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); - break; - case RTC_UIE_ON: + if (enabled) wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_SEC); - break; - - default: - return -ENOIOCTLCMD; - } + else + wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); return 0; } @@ -345,11 +341,12 @@ static void wm8350_rtc_update_handler(struct wm8350 *wm8350, int irq, } static const struct rtc_class_ops wm8350_rtc_ops = { - .ioctl = wm8350_rtc_ioctl, .read_time = wm8350_rtc_readtime, .set_time = wm8350_rtc_settime, .read_alarm = wm8350_rtc_readalarm, .set_alarm = wm8350_rtc_setalarm, + .alarm_irq_enable = wm8350_rtc_alarm_irq_enable, + .update_irq_enable = wm8350_rtc_update_irq_enable, }; #ifdef CONFIG_PM @@ -440,7 +437,7 @@ static int wm8350_rtc_probe(struct platform_device *pdev) do { timectl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); - } while (timectl & WM8350_RTC_STS && retries--); + } while (timectl & WM8350_RTC_STS && --retries); if (retries == 0) { dev_err(&pdev->dev, "failed to start: timeout\n"); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 2fd64e5a9ab2..0570794ccf1c 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2363,6 +2363,7 @@ int dasd_generic_notify(struct ccw_device *cdev, int event) ret = 0; switch (event) { case CIO_GONE: + case CIO_BOXED: case CIO_NO_PATH: /* First of all call extended error reporting. */ dasd_eer_write(device, NULL, DASD_EER_NOPATH); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index c4d2f667a2f6..35441fa16be1 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -310,8 +310,6 @@ static void ccw_device_remove_orphan_cb(struct work_struct *work) put_device(&cdev->dev); } -static void ccw_device_call_sch_unregister(struct work_struct *work); - static void ccw_device_remove_disconnected(struct ccw_device *cdev) { @@ -335,11 +333,10 @@ ccw_device_remove_disconnected(struct ccw_device *cdev) spin_unlock_irqrestore(cdev->ccwlock, flags); PREPARE_WORK(&cdev->private->kick_work, ccw_device_remove_orphan_cb); + queue_work(slow_path_wq, &cdev->private->kick_work); } else /* Deregister subchannel, which will kill the ccw device. */ - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister); - queue_work(slow_path_wq, &cdev->private->kick_work); + ccw_device_schedule_sch_unregister(cdev); } /** @@ -471,7 +468,7 @@ static int online_store_recog_and_online(struct ccw_device *cdev) int ret; /* Do device recognition, if needed. */ - if (cdev->id.cu_type == 0) { + if (cdev->private->state == DEV_STATE_BOXED) { ret = ccw_device_recognition(cdev); if (ret) { CIO_MSG_EVENT(0, "Couldn't start recognition " @@ -482,17 +479,21 @@ static int online_store_recog_and_online(struct ccw_device *cdev) } wait_event(cdev->private->wait_q, cdev->private->flags.recog_done); + if (cdev->private->state != DEV_STATE_OFFLINE) + /* recognition failed */ + return -EAGAIN; } if (cdev->drv && cdev->drv->set_online) ccw_device_set_online(cdev); return 0; } + static int online_store_handle_online(struct ccw_device *cdev, int force) { int ret; ret = online_store_recog_and_online(cdev); - if (ret) + if (ret && !force) return ret; if (force && cdev->private->state == DEV_STATE_BOXED) { ret = ccw_device_stlck(cdev); @@ -500,7 +501,9 @@ static int online_store_handle_online(struct ccw_device *cdev, int force) return ret; if (cdev->id.cu_type == 0) cdev->private->state = DEV_STATE_NOT_OPER; - online_store_recog_and_online(cdev); + ret = online_store_recog_and_online(cdev); + if (ret) + return ret; } return 0; } @@ -512,7 +515,11 @@ static ssize_t online_store (struct device *dev, struct device_attribute *attr, int force, ret; unsigned long i; - if (atomic_cmpxchg(&cdev->private->onoff, 0, 1) != 0) + if ((cdev->private->state != DEV_STATE_OFFLINE && + cdev->private->state != DEV_STATE_ONLINE && + cdev->private->state != DEV_STATE_BOXED && + cdev->private->state != DEV_STATE_DISCONNECTED) || + atomic_cmpxchg(&cdev->private->onoff, 0, 1) != 0) return -EAGAIN; if (cdev->drv && !try_module_get(cdev->drv->owner)) { @@ -1014,6 +1021,13 @@ static void ccw_device_call_sch_unregister(struct work_struct *work) put_device(&sch->dev); } +void ccw_device_schedule_sch_unregister(struct ccw_device *cdev) +{ + PREPARE_WORK(&cdev->private->kick_work, + ccw_device_call_sch_unregister); + queue_work(slow_path_wq, &cdev->private->kick_work); +} + /* * subchannel recognition done. Called from the state machine. */ @@ -1025,19 +1039,17 @@ io_subchannel_recog_done(struct ccw_device *cdev) return; } switch (cdev->private->state) { + case DEV_STATE_BOXED: + /* Device did not respond in time. */ case DEV_STATE_NOT_OPER: cdev->private->flags.recog_done = 1; /* Remove device found not operational. */ if (!get_device(&cdev->dev)) break; - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister); - queue_work(slow_path_wq, &cdev->private->kick_work); + ccw_device_schedule_sch_unregister(cdev); if (atomic_dec_and_test(&ccw_device_init_count)) wake_up(&ccw_device_init_wq); break; - case DEV_STATE_BOXED: - /* Device did not respond in time. */ case DEV_STATE_OFFLINE: /* * We can't register the device in interrupt context so @@ -1551,8 +1563,7 @@ static int purge_fn(struct device *dev, void *data) goto out; CIO_MSG_EVENT(3, "ccw: purging 0.%x.%04x\n", priv->dev_id.ssid, priv->dev_id.devno); - PREPARE_WORK(&cdev->private->kick_work, ccw_device_call_sch_unregister); - queue_work(slow_path_wq, &cdev->private->kick_work); + ccw_device_schedule_sch_unregister(cdev); out: /* Abort loop in case of pending signal. */ diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index 85e01846ca65..f1cbbd94ad4e 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -87,6 +87,7 @@ int ccw_device_is_orphan(struct ccw_device *); int ccw_device_recognition(struct ccw_device *); int ccw_device_online(struct ccw_device *); int ccw_device_offline(struct ccw_device *); +void ccw_device_schedule_sch_unregister(struct ccw_device *); int ccw_purge_blacklisted(void); /* Function prototypes for device status and basic sense stuff. */ diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 87b4bfca080f..e46049261561 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -256,13 +256,12 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) old_lpm = 0; if (sch->lpm != old_lpm) __recover_lost_chpids(sch, old_lpm); - if (cdev->private->state == DEV_STATE_DISCONNECTED_SENSE_ID) { - if (state == DEV_STATE_NOT_OPER) { - cdev->private->flags.recog_done = 1; - cdev->private->state = DEV_STATE_DISCONNECTED; - return; - } - /* Boxed devices don't need extra treatment. */ + if (cdev->private->state == DEV_STATE_DISCONNECTED_SENSE_ID && + (state == DEV_STATE_NOT_OPER || state == DEV_STATE_BOXED)) { + cdev->private->flags.recog_done = 1; + cdev->private->state = DEV_STATE_DISCONNECTED; + wake_up(&cdev->private->wait_q); + return; } notify = 0; same_dev = 0; /* Keep the compiler quiet... */ @@ -274,7 +273,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) sch->schid.ssid, sch->schid.sch_no); break; case DEV_STATE_OFFLINE: - if (cdev->private->state == DEV_STATE_DISCONNECTED_SENSE_ID) { + if (cdev->online) { same_dev = ccw_device_handle_oper(cdev); notify = 1; } @@ -307,12 +306,17 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) " subchannel 0.%x.%04x\n", cdev->private->dev_id.devno, sch->schid.ssid, sch->schid.sch_no); + if (cdev->id.cu_type != 0) { /* device was recognized before */ + cdev->private->flags.recog_done = 1; + cdev->private->state = DEV_STATE_BOXED; + wake_up(&cdev->private->wait_q); + return; + } break; } cdev->private->state = state; io_subchannel_recog_done(cdev); - if (state != DEV_STATE_NOT_OPER) - wake_up(&cdev->private->wait_q); + wake_up(&cdev->private->wait_q); } /* @@ -390,10 +394,13 @@ ccw_device_done(struct ccw_device *cdev, int state) cdev->private->state = state; - - if (state == DEV_STATE_BOXED) + if (state == DEV_STATE_BOXED) { CIO_MSG_EVENT(0, "Boxed device %04x on subchannel %04x\n", cdev->private->dev_id.devno, sch->schid.sch_no); + if (cdev->online && !ccw_device_notify(cdev, CIO_BOXED)) + ccw_device_schedule_sch_unregister(cdev); + cdev->private->flags.donotify = 0; + } if (cdev->private->flags.donotify) { cdev->private->flags.donotify = 0; diff --git a/drivers/s390/net/qeth_core_offl.c b/drivers/s390/net/qeth_core_offl.c deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/drivers/s390/net/qeth_core_offl.c +++ /dev/null diff --git a/drivers/s390/net/qeth_core_offl.h b/drivers/s390/net/qeth_core_offl.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/drivers/s390/net/qeth_core_offl.h +++ /dev/null diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index 1fe1e2eda512..cfb0dcb6e3ff 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -176,6 +176,11 @@ static int zfcp_ccw_notify(struct ccw_device *ccw_device, int event) zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_COMMON_ERP_FAILED, "ccnoti4", NULL); break; + case CIO_BOXED: + dev_warn(&adapter->ccw_device->dev, + "The ccw device did not respond in time.\n"); + zfcp_erp_adapter_shutdown(adapter, 0, "ccnoti5", NULL); + break; } return 1; } diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index 42f4e66fccaf..bf3c0e32a334 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -27,6 +27,8 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/console.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/serial_core.h> #include <linux/smp_lock.h> #include <linux/device.h> @@ -1682,20 +1684,20 @@ static const char *uart_type(struct uart_port *port) #ifdef CONFIG_PROC_FS -static int uart_line_info(char *buf, struct uart_driver *drv, int i) +static void uart_line_info(struct seq_file *m, struct uart_driver *drv, int i) { struct uart_state *state = drv->state + i; int pm_state; struct uart_port *port = state->port; char stat_buf[32]; unsigned int status; - int mmio, ret; + int mmio; if (!port) - return 0; + return; mmio = port->iotype >= UPIO_MEM; - ret = sprintf(buf, "%d: uart:%s %s%08llX irq:%d", + seq_printf(m, "%d: uart:%s %s%08llX irq:%d", port->line, uart_type(port), mmio ? "mmio:0x" : "port:", mmio ? (unsigned long long)port->mapbase @@ -1703,8 +1705,8 @@ static int uart_line_info(char *buf, struct uart_driver *drv, int i) port->irq); if (port->type == PORT_UNKNOWN) { - strcat(buf, "\n"); - return ret + 1; + seq_putc(m, '\n'); + return; } if (capable(CAP_SYS_ADMIN)) { @@ -1719,19 +1721,19 @@ static int uart_line_info(char *buf, struct uart_driver *drv, int i) uart_change_pm(state, pm_state); mutex_unlock(&state->mutex); - ret += sprintf(buf + ret, " tx:%d rx:%d", + seq_printf(m, " tx:%d rx:%d", port->icount.tx, port->icount.rx); if (port->icount.frame) - ret += sprintf(buf + ret, " fe:%d", + seq_printf(m, " fe:%d", port->icount.frame); if (port->icount.parity) - ret += sprintf(buf + ret, " pe:%d", + seq_printf(m, " pe:%d", port->icount.parity); if (port->icount.brk) - ret += sprintf(buf + ret, " brk:%d", + seq_printf(m, " brk:%d", port->icount.brk); if (port->icount.overrun) - ret += sprintf(buf + ret, " oe:%d", + seq_printf(m, " oe:%d", port->icount.overrun); #define INFOBIT(bit, str) \ @@ -1753,45 +1755,39 @@ static int uart_line_info(char *buf, struct uart_driver *drv, int i) STATBIT(TIOCM_RNG, "|RI"); if (stat_buf[0]) stat_buf[0] = ' '; - strcat(stat_buf, "\n"); - ret += sprintf(buf + ret, stat_buf); - } else { - strcat(buf, "\n"); - ret++; + seq_puts(m, stat_buf); } + seq_putc(m, '\n'); #undef STATBIT #undef INFOBIT - return ret; } -static int uart_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int uart_proc_show(struct seq_file *m, void *v) { - struct tty_driver *ttydrv = data; + struct tty_driver *ttydrv = v; struct uart_driver *drv = ttydrv->driver_state; - int i, len = 0, l; - off_t begin = 0; + int i; - len += sprintf(page, "serinfo:1.0 driver%s%s revision:%s\n", + seq_printf(m, "serinfo:1.0 driver%s%s revision:%s\n", "", "", ""); - for (i = 0; i < drv->nr && len < PAGE_SIZE - 96; i++) { - l = uart_line_info(page + len, drv, i); - len += l; - if (len + begin > off + count) - goto done; - if (len + begin < off) { - begin += len; - len = 0; - } - } - *eof = 1; - done: - if (off >= len + begin) - return 0; - *start = page + (off - begin); - return (count < begin + len - off) ? count : (begin + len - off); + for (i = 0; i < drv->nr; i++) + uart_line_info(m, drv, i); + return 0; } + +static int uart_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, uart_proc_show, PDE(inode)->data); +} + +static const struct file_operations uart_proc_fops = { + .owner = THIS_MODULE, + .open = uart_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(CONFIG_CONSOLE_POLL) @@ -2299,7 +2295,7 @@ static const struct tty_operations uart_ops = { .break_ctl = uart_break_ctl, .wait_until_sent= uart_wait_until_sent, #ifdef CONFIG_PROC_FS - .read_proc = uart_read_proc, + .proc_fops = &uart_proc_fops, #endif .tiocmget = uart_tiocmget, .tiocmset = uart_tiocmset, diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c index 44a2b46ccb79..f4573a96af24 100644 --- a/drivers/spi/spi_mpc83xx.c +++ b/drivers/spi/spi_mpc83xx.c @@ -14,6 +14,8 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/err.h> #include <linux/completion.h> #include <linux/interrupt.h> #include <linux/delay.h> @@ -23,7 +25,13 @@ #include <linux/spi/spi_bitbang.h> #include <linux/platform_device.h> #include <linux/fsl_devices.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/gpio.h> +#include <linux/of_gpio.h> +#include <linux/of_spi.h> +#include <sysdev/fsl_soc.h> #include <asm/irq.h> #include <asm/io.h> @@ -79,7 +87,7 @@ struct mpc83xx_spi { u32(*get_tx) (struct mpc83xx_spi *); unsigned int count; - int irq; + unsigned int irq; unsigned nsecs; /* (clock cycle time)/2 */ @@ -89,9 +97,6 @@ struct mpc83xx_spi { bool qe_mode; - void (*activate_cs) (u8 cs, u8 polarity); - void (*deactivate_cs) (u8 cs, u8 polarity); - u8 busy; struct workqueue_struct *workqueue; @@ -123,6 +128,7 @@ static inline u32 mpc83xx_spi_read_reg(__be32 __iomem * reg) } #define MPC83XX_SPI_RX_BUF(type) \ +static \ void mpc83xx_spi_rx_buf_##type(u32 data, struct mpc83xx_spi *mpc83xx_spi) \ { \ type * rx = mpc83xx_spi->rx; \ @@ -131,6 +137,7 @@ void mpc83xx_spi_rx_buf_##type(u32 data, struct mpc83xx_spi *mpc83xx_spi) \ } #define MPC83XX_SPI_TX_BUF(type) \ +static \ u32 mpc83xx_spi_tx_buf_##type(struct mpc83xx_spi *mpc83xx_spi) \ { \ u32 data; \ @@ -151,15 +158,14 @@ MPC83XX_SPI_TX_BUF(u32) static void mpc83xx_spi_chipselect(struct spi_device *spi, int value) { - struct mpc83xx_spi *mpc83xx_spi; - u8 pol = spi->mode & SPI_CS_HIGH ? 1 : 0; + struct mpc83xx_spi *mpc83xx_spi = spi_master_get_devdata(spi->master); + struct fsl_spi_platform_data *pdata = spi->dev.parent->platform_data; + bool pol = spi->mode & SPI_CS_HIGH; struct spi_mpc83xx_cs *cs = spi->controller_state; - mpc83xx_spi = spi_master_get_devdata(spi->master); - if (value == BITBANG_CS_INACTIVE) { - if (mpc83xx_spi->deactivate_cs) - mpc83xx_spi->deactivate_cs(spi->chip_select, pol); + if (pdata->cs_control) + pdata->cs_control(spi, !pol); } if (value == BITBANG_CS_ACTIVE) { @@ -172,7 +178,7 @@ static void mpc83xx_spi_chipselect(struct spi_device *spi, int value) if (cs->hw_mode != regval) { unsigned long flags; - void *tmp_ptr = &mpc83xx_spi->base->mode; + __be32 __iomem *mode = &mpc83xx_spi->base->mode; regval = cs->hw_mode; /* Turn off IRQs locally to minimize time that @@ -180,12 +186,12 @@ static void mpc83xx_spi_chipselect(struct spi_device *spi, int value) */ local_irq_save(flags); /* Turn off SPI unit prior changing mode */ - mpc83xx_spi_write_reg(tmp_ptr, regval & ~SPMODE_ENABLE); - mpc83xx_spi_write_reg(tmp_ptr, regval); + mpc83xx_spi_write_reg(mode, regval & ~SPMODE_ENABLE); + mpc83xx_spi_write_reg(mode, regval); local_irq_restore(flags); } - if (mpc83xx_spi->activate_cs) - mpc83xx_spi->activate_cs(spi->chip_select, pol); + if (pdata->cs_control) + pdata->cs_control(spi, pol); } } @@ -284,7 +290,7 @@ int mpc83xx_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t) regval = mpc83xx_spi_read_reg(&mpc83xx_spi->base->mode); if (cs->hw_mode != regval) { unsigned long flags; - void *tmp_ptr = &mpc83xx_spi->base->mode; + __be32 __iomem *mode = &mpc83xx_spi->base->mode; regval = cs->hw_mode; /* Turn off IRQs locally to minimize time @@ -292,8 +298,8 @@ int mpc83xx_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t) */ local_irq_save(flags); /* Turn off SPI unit prior changing mode */ - mpc83xx_spi_write_reg(tmp_ptr, regval & ~SPMODE_ENABLE); - mpc83xx_spi_write_reg(tmp_ptr, regval); + mpc83xx_spi_write_reg(mode, regval & ~SPMODE_ENABLE); + mpc83xx_spi_write_reg(mode, regval); local_irq_restore(flags); } return 0; @@ -483,7 +489,7 @@ static int mpc83xx_spi_setup(struct spi_device *spi) return 0; } -irqreturn_t mpc83xx_spi_irq(s32 irq, void *context_data) +static irqreturn_t mpc83xx_spi_irq(s32 irq, void *context_data) { struct mpc83xx_spi *mpc83xx_spi = context_data; u32 event; @@ -545,43 +551,28 @@ static void mpc83xx_spi_cleanup(struct spi_device *spi) kfree(spi->controller_state); } -static int __init mpc83xx_spi_probe(struct platform_device *dev) +static struct spi_master * __devinit +mpc83xx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq) { + struct fsl_spi_platform_data *pdata = dev->platform_data; struct spi_master *master; struct mpc83xx_spi *mpc83xx_spi; - struct fsl_spi_platform_data *pdata; - struct resource *r; u32 regval; int ret = 0; - /* Get resources(memory, IRQ) associated with the device */ - master = spi_alloc_master(&dev->dev, sizeof(struct mpc83xx_spi)); - + master = spi_alloc_master(dev, sizeof(struct mpc83xx_spi)); if (master == NULL) { ret = -ENOMEM; goto err; } - platform_set_drvdata(dev, master); - pdata = dev->dev.platform_data; + dev_set_drvdata(dev, master); - if (pdata == NULL) { - ret = -ENODEV; - goto free_master; - } - - r = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (r == NULL) { - ret = -ENODEV; - goto free_master; - } master->setup = mpc83xx_spi_setup; master->transfer = mpc83xx_spi_transfer; master->cleanup = mpc83xx_spi_cleanup; mpc83xx_spi = spi_master_get_devdata(master); - mpc83xx_spi->activate_cs = pdata->activate_cs; - mpc83xx_spi->deactivate_cs = pdata->deactivate_cs; mpc83xx_spi->qe_mode = pdata->qe_mode; mpc83xx_spi->get_rx = mpc83xx_spi_rx_buf_u8; mpc83xx_spi->get_tx = mpc83xx_spi_tx_buf_u8; @@ -596,18 +587,13 @@ static int __init mpc83xx_spi_probe(struct platform_device *dev) init_completion(&mpc83xx_spi->done); - mpc83xx_spi->base = ioremap(r->start, r->end - r->start + 1); + mpc83xx_spi->base = ioremap(mem->start, mem->end - mem->start + 1); if (mpc83xx_spi->base == NULL) { ret = -ENOMEM; goto put_master; } - mpc83xx_spi->irq = platform_get_irq(dev, 0); - - if (mpc83xx_spi->irq < 0) { - ret = -ENXIO; - goto unmap_io; - } + mpc83xx_spi->irq = irq; /* Register for SPI Interrupt */ ret = request_irq(mpc83xx_spi->irq, mpc83xx_spi_irq, @@ -649,9 +635,9 @@ static int __init mpc83xx_spi_probe(struct platform_device *dev) printk(KERN_INFO "%s: MPC83xx SPI Controller driver at 0x%p (irq = %d)\n", - dev_name(&dev->dev), mpc83xx_spi->base, mpc83xx_spi->irq); + dev_name(dev), mpc83xx_spi->base, mpc83xx_spi->irq); - return ret; + return master; unreg_master: destroy_workqueue(mpc83xx_spi->workqueue); @@ -661,18 +647,16 @@ unmap_io: iounmap(mpc83xx_spi->base); put_master: spi_master_put(master); -free_master: - kfree(master); err: - return ret; + return ERR_PTR(ret); } -static int __exit mpc83xx_spi_remove(struct platform_device *dev) +static int __devexit mpc83xx_spi_remove(struct device *dev) { struct mpc83xx_spi *mpc83xx_spi; struct spi_master *master; - master = platform_get_drvdata(dev); + master = dev_get_drvdata(dev); mpc83xx_spi = spi_master_get_devdata(master); flush_workqueue(mpc83xx_spi->workqueue); @@ -685,23 +669,293 @@ static int __exit mpc83xx_spi_remove(struct platform_device *dev) return 0; } +struct mpc83xx_spi_probe_info { + struct fsl_spi_platform_data pdata; + int *gpios; + bool *alow_flags; +}; + +static struct mpc83xx_spi_probe_info * +to_of_pinfo(struct fsl_spi_platform_data *pdata) +{ + return container_of(pdata, struct mpc83xx_spi_probe_info, pdata); +} + +static void mpc83xx_spi_cs_control(struct spi_device *spi, bool on) +{ + struct device *dev = spi->dev.parent; + struct mpc83xx_spi_probe_info *pinfo = to_of_pinfo(dev->platform_data); + u16 cs = spi->chip_select; + int gpio = pinfo->gpios[cs]; + bool alow = pinfo->alow_flags[cs]; + + gpio_set_value(gpio, on ^ alow); +} + +static int of_mpc83xx_spi_get_chipselects(struct device *dev) +{ + struct device_node *np = dev_archdata_get_node(&dev->archdata); + struct fsl_spi_platform_data *pdata = dev->platform_data; + struct mpc83xx_spi_probe_info *pinfo = to_of_pinfo(pdata); + unsigned int ngpios; + int i = 0; + int ret; + + ngpios = of_gpio_count(np); + if (!ngpios) { + /* + * SPI w/o chip-select line. One SPI device is still permitted + * though. + */ + pdata->max_chipselect = 1; + return 0; + } + + pinfo->gpios = kmalloc(ngpios * sizeof(pinfo->gpios), GFP_KERNEL); + if (!pinfo->gpios) + return -ENOMEM; + memset(pinfo->gpios, -1, ngpios * sizeof(pinfo->gpios)); + + pinfo->alow_flags = kzalloc(ngpios * sizeof(pinfo->alow_flags), + GFP_KERNEL); + if (!pinfo->alow_flags) { + ret = -ENOMEM; + goto err_alloc_flags; + } + + for (; i < ngpios; i++) { + int gpio; + enum of_gpio_flags flags; + + gpio = of_get_gpio_flags(np, i, &flags); + if (!gpio_is_valid(gpio)) { + dev_err(dev, "invalid gpio #%d: %d\n", i, gpio); + goto err_loop; + } + + ret = gpio_request(gpio, dev_name(dev)); + if (ret) { + dev_err(dev, "can't request gpio #%d: %d\n", i, ret); + goto err_loop; + } + + pinfo->gpios[i] = gpio; + pinfo->alow_flags[i] = flags & OF_GPIO_ACTIVE_LOW; + + ret = gpio_direction_output(pinfo->gpios[i], + pinfo->alow_flags[i]); + if (ret) { + dev_err(dev, "can't set output direction for gpio " + "#%d: %d\n", i, ret); + goto err_loop; + } + } + + pdata->max_chipselect = ngpios; + pdata->cs_control = mpc83xx_spi_cs_control; + + return 0; + +err_loop: + while (i >= 0) { + if (gpio_is_valid(pinfo->gpios[i])) + gpio_free(pinfo->gpios[i]); + i--; + } + + kfree(pinfo->alow_flags); + pinfo->alow_flags = NULL; +err_alloc_flags: + kfree(pinfo->gpios); + pinfo->gpios = NULL; + return ret; +} + +static int of_mpc83xx_spi_free_chipselects(struct device *dev) +{ + struct fsl_spi_platform_data *pdata = dev->platform_data; + struct mpc83xx_spi_probe_info *pinfo = to_of_pinfo(pdata); + int i; + + if (!pinfo->gpios) + return 0; + + for (i = 0; i < pdata->max_chipselect; i++) { + if (gpio_is_valid(pinfo->gpios[i])) + gpio_free(pinfo->gpios[i]); + } + + kfree(pinfo->gpios); + kfree(pinfo->alow_flags); + return 0; +} + +static int __devinit of_mpc83xx_spi_probe(struct of_device *ofdev, + const struct of_device_id *ofid) +{ + struct device *dev = &ofdev->dev; + struct device_node *np = ofdev->node; + struct mpc83xx_spi_probe_info *pinfo; + struct fsl_spi_platform_data *pdata; + struct spi_master *master; + struct resource mem; + struct resource irq; + const void *prop; + int ret = -ENOMEM; + + pinfo = kzalloc(sizeof(*pinfo), GFP_KERNEL); + if (!pinfo) + return -ENOMEM; + + pdata = &pinfo->pdata; + dev->platform_data = pdata; + + /* Allocate bus num dynamically. */ + pdata->bus_num = -1; + + /* SPI controller is either clocked from QE or SoC clock. */ + pdata->sysclk = get_brgfreq(); + if (pdata->sysclk == -1) { + pdata->sysclk = fsl_get_sys_freq(); + if (pdata->sysclk == -1) { + ret = -ENODEV; + goto err_clk; + } + } + + prop = of_get_property(np, "mode", NULL); + if (prop && !strcmp(prop, "cpu-qe")) + pdata->qe_mode = 1; + + ret = of_mpc83xx_spi_get_chipselects(dev); + if (ret) + goto err; + + ret = of_address_to_resource(np, 0, &mem); + if (ret) + goto err; + + ret = of_irq_to_resource(np, 0, &irq); + if (!ret) { + ret = -EINVAL; + goto err; + } + + master = mpc83xx_spi_probe(dev, &mem, irq.start); + if (IS_ERR(master)) { + ret = PTR_ERR(master); + goto err; + } + + of_register_spi_devices(master, np); + + return 0; + +err: + of_mpc83xx_spi_free_chipselects(dev); +err_clk: + kfree(pinfo); + return ret; +} + +static int __devexit of_mpc83xx_spi_remove(struct of_device *ofdev) +{ + int ret; + + ret = mpc83xx_spi_remove(&ofdev->dev); + if (ret) + return ret; + of_mpc83xx_spi_free_chipselects(&ofdev->dev); + return 0; +} + +static const struct of_device_id of_mpc83xx_spi_match[] = { + { .compatible = "fsl,spi" }, + {}, +}; +MODULE_DEVICE_TABLE(of, of_mpc83xx_spi_match); + +static struct of_platform_driver of_mpc83xx_spi_driver = { + .name = "mpc83xx_spi", + .match_table = of_mpc83xx_spi_match, + .probe = of_mpc83xx_spi_probe, + .remove = __devexit_p(of_mpc83xx_spi_remove), +}; + +#ifdef CONFIG_MPC832x_RDB +/* + * XXX XXX XXX + * This is "legacy" platform driver, was used by the MPC8323E-RDB boards + * only. The driver should go away soon, since newer MPC8323E-RDB's device + * tree can work with OpenFirmware driver. But for now we support old trees + * as well. + */ +static int __devinit plat_mpc83xx_spi_probe(struct platform_device *pdev) +{ + struct resource *mem; + unsigned int irq; + struct spi_master *master; + + if (!pdev->dev.platform_data) + return -EINVAL; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (!irq) + return -EINVAL; + + master = mpc83xx_spi_probe(&pdev->dev, mem, irq); + if (IS_ERR(master)) + return PTR_ERR(master); + return 0; +} + +static int __devexit plat_mpc83xx_spi_remove(struct platform_device *pdev) +{ + return mpc83xx_spi_remove(&pdev->dev); +} + MODULE_ALIAS("platform:mpc83xx_spi"); static struct platform_driver mpc83xx_spi_driver = { - .remove = __exit_p(mpc83xx_spi_remove), + .probe = plat_mpc83xx_spi_probe, + .remove = __exit_p(plat_mpc83xx_spi_remove), .driver = { .name = "mpc83xx_spi", .owner = THIS_MODULE, }, }; +static bool legacy_driver_failed; + +static void __init legacy_driver_register(void) +{ + legacy_driver_failed = platform_driver_register(&mpc83xx_spi_driver); +} + +static void __exit legacy_driver_unregister(void) +{ + if (legacy_driver_failed) + return; + platform_driver_unregister(&mpc83xx_spi_driver); +} +#else +static void __init legacy_driver_register(void) {} +static void __exit legacy_driver_unregister(void) {} +#endif /* CONFIG_MPC832x_RDB */ + static int __init mpc83xx_spi_init(void) { - return platform_driver_probe(&mpc83xx_spi_driver, mpc83xx_spi_probe); + legacy_driver_register(); + return of_register_platform_driver(&of_mpc83xx_spi_driver); } static void __exit mpc83xx_spi_exit(void) { - platform_driver_unregister(&mpc83xx_spi_driver); + of_unregister_platform_driver(&of_mpc83xx_spi_driver); + legacy_driver_unregister(); } module_init(mpc83xx_spi_init); diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 742a5bc44be8..2a70563bbee1 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -26,6 +26,7 @@ #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/list.h> @@ -421,57 +422,52 @@ static int serial_break(struct tty_struct *tty, int break_state) return 0; } -static int serial_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int serial_proc_show(struct seq_file *m, void *v) { struct usb_serial *serial; - int length = 0; int i; - off_t begin = 0; char tmp[40]; dbg("%s", __func__); - length += sprintf(page, "usbserinfo:1.0 driver:2.0\n"); - for (i = 0; i < SERIAL_TTY_MINORS && length < PAGE_SIZE; ++i) { + seq_puts(m, "usbserinfo:1.0 driver:2.0\n"); + for (i = 0; i < SERIAL_TTY_MINORS; ++i) { serial = usb_serial_get_by_index(i); if (serial == NULL) continue; - length += sprintf(page+length, "%d:", i); + seq_printf(m, "%d:", i); if (serial->type->driver.owner) - length += sprintf(page+length, " module:%s", + seq_printf(m, " module:%s", module_name(serial->type->driver.owner)); - length += sprintf(page+length, " name:\"%s\"", + seq_printf(m, " name:\"%s\"", serial->type->description); - length += sprintf(page+length, " vendor:%04x product:%04x", + seq_printf(m, " vendor:%04x product:%04x", le16_to_cpu(serial->dev->descriptor.idVendor), le16_to_cpu(serial->dev->descriptor.idProduct)); - length += sprintf(page+length, " num_ports:%d", - serial->num_ports); - length += sprintf(page+length, " port:%d", - i - serial->minor + 1); + seq_printf(m, " num_ports:%d", serial->num_ports); + seq_printf(m, " port:%d", i - serial->minor + 1); usb_make_path(serial->dev, tmp, sizeof(tmp)); - length += sprintf(page+length, " path:%s", tmp); + seq_printf(m, " path:%s", tmp); - length += sprintf(page+length, "\n"); - if ((length + begin) > (off + count)) { - usb_serial_put(serial); - goto done; - } - if ((length + begin) < off) { - begin += length; - length = 0; - } + seq_putc(m, '\n'); usb_serial_put(serial); } - *eof = 1; -done: - if (off >= (length + begin)) - return 0; - *start = page + (off-begin); - return (count < begin+length-off) ? count : begin+length-off; + return 0; } +static int serial_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, serial_proc_show, NULL); +} + +static const struct file_operations serial_proc_fops = { + .owner = THIS_MODULE, + .open = serial_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int serial_tiocmget(struct tty_struct *tty, struct file *file) { struct usb_serial_port *port = tty->driver_data; @@ -1113,9 +1109,9 @@ static const struct tty_operations serial_ops = { .unthrottle = serial_unthrottle, .break_ctl = serial_break, .chars_in_buffer = serial_chars_in_buffer, - .read_proc = serial_read_proc, .tiocmget = serial_tiocmget, .tiocmset = serial_tiocmset, + .proc_fops = &serial_proc_fops, }; struct tty_driver *usb_serial_tty_driver; diff --git a/drivers/video/68328fb.c b/drivers/video/68328fb.c index 7f907fb23b8a..0b17824b0eb5 100644 --- a/drivers/video/68328fb.c +++ b/drivers/video/68328fb.c @@ -471,9 +471,11 @@ int __init mc68x328fb_init(void) fb_info.pseudo_palette = &mc68x328fb_pseudo_palette; fb_info.flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN; - fb_alloc_cmap(&fb_info.cmap, 256, 0); + if (fb_alloc_cmap(&fb_info.cmap, 256, 0)) + return -ENOMEM; if (register_framebuffer(&fb_info) < 0) { + fb_dealloc_cmap(&fb_info.cmap); return -EINVAL; } @@ -494,6 +496,7 @@ module_init(mc68x328fb_init); static void __exit mc68x328fb_cleanup(void) { unregister_framebuffer(&fb_info); + fb_dealloc_cmap(&fb_info.cmap); } module_exit(mc68x328fb_cleanup); diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 41c27a44bd82..ffe2f2796e29 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1597,32 +1597,8 @@ config FB_VT8623 Driver for CastleRock integrated graphics core in the VIA VT8623 [Apollo CLE266] chipset. -config FB_CYBLA - tristate "Cyberblade/i1 support" - depends on FB && PCI && X86_32 && !64BIT - select FB_CFB_IMAGEBLIT - ---help--- - This driver is supposed to support the Trident Cyberblade/i1 - graphics core integrated in the VIA VT8601A North Bridge, - also known as VIA Apollo PLE133. - - Status: - - Developed, tested and working on EPIA 5000 and EPIA 800. - - Does work reliable on all systems with CRT/LCD connected to - normal VGA ports. - - Should work on systems that do use the internal LCD port, but - this is absolutely not tested. - - Character imageblit, copyarea and rectangle fill are hw accelerated, - ypan scrolling is used by default. - - Please do read <file:Documentation/fb/cyblafb/*>. - - To compile this driver as a module, choose M here: the - module will be called cyblafb. - config FB_TRIDENT - tristate "Trident support" + tristate "Trident/CyberXXX/CyberBlade support" depends on FB && PCI select FB_CFB_FILLRECT select FB_CFB_COPYAREA @@ -1633,21 +1609,14 @@ config FB_TRIDENT and Blade XP. There are also integrated versions of these chips called CyberXXXX, CyberImage or CyberBlade. These chips are mostly found in laptops - but also on some motherboards. For more information, read - <file:Documentation/fb/tridentfb.txt> + but also on some motherboards including early VIA EPIA motherboards. + For more information, read <file:Documentation/fb/tridentfb.txt> Say Y if you have such a graphics board. To compile this driver as a module, choose M here: the module will be called tridentfb. -config FB_TRIDENT_ACCEL - bool "Trident Acceleration functions (EXPERIMENTAL)" - depends on FB_TRIDENT && EXPERIMENTAL - ---help--- - This will compile the Trident frame buffer device with - acceleration functions. - config FB_ARK tristate "ARK 2000PV support" depends on FB && PCI @@ -1920,6 +1889,30 @@ config FB_TMIO_ACCELL depends on FB_TMIO default y +config FB_S3C + tristate "Samsung S3C framebuffer support" + depends on FB && ARCH_S3C64XX + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + ---help--- + Frame buffer driver for the built-in FB controller in the Samsung + SoC line from the S3C2443 onwards, including the S3C2416, S3C2450, + and the S3C64XX series such as the S3C6400 and S3C6410. + + These chips all have the same basic framebuffer design with the + actual capabilities depending on the chip. For instance the S3C6400 + and S3C6410 support 4 hardware windows whereas the S3C24XX series + currently only have two. + + Currently the support is only for the S3C6400 and S3C6410 SoCs. + +config FB_S3C_DEBUG_REGWRITE + bool "Debug register writes" + depends on FB_S3C + ---help--- + Show all register writes via printk(KERN_DEBUG) + config FB_S3C2410 tristate "S3C2410 LCD framebuffer support" depends on FB && ARCH_S3C2410 diff --git a/drivers/video/Makefile b/drivers/video/Makefile index bb265eca7d57..0dbd6c68d76b 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -76,6 +76,7 @@ obj-$(CONFIG_FB_ATARI) += atafb.o c2p_iplan2.o atafb_mfb.o \ atafb_iplan2p2.o atafb_iplan2p4.o atafb_iplan2p8.o obj-$(CONFIG_FB_MAC) += macfb.o obj-$(CONFIG_FB_HECUBA) += hecubafb.o +obj-$(CONFIG_FB_N411) += n411.o obj-$(CONFIG_FB_HGA) += hgafb.o obj-$(CONFIG_FB_XVR500) += sunxvr500.o obj-$(CONFIG_FB_XVR2500) += sunxvr2500.o @@ -110,6 +111,7 @@ obj-$(CONFIG_FB_BROADSHEET) += broadsheetfb.o obj-$(CONFIG_FB_S1D13XXX) += s1d13xxxfb.o obj-$(CONFIG_FB_SH7760) += sh7760fb.o obj-$(CONFIG_FB_IMX) += imxfb.o +obj-$(CONFIG_FB_S3C) += s3c-fb.o obj-$(CONFIG_FB_S3C2410) += s3c2410fb.o obj-$(CONFIG_FB_FSL_DIU) += fsl-diu-fb.o obj-$(CONFIG_FB_COBALT) += cobalt_lcdfb.o diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 4e046fed1380..61050ab14128 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -408,7 +408,9 @@ static int clcdfb_register(struct clcd_fb *fb) /* * Allocate colourmap. */ - fb_alloc_cmap(&fb->fb.cmap, 256, 0); + ret = fb_alloc_cmap(&fb->fb.cmap, 256, 0); + if (ret) + goto unmap; /* * Ensure interrupts are disabled. @@ -426,6 +428,8 @@ static int clcdfb_register(struct clcd_fb *fb) printk(KERN_ERR "CLCD: cannot register framebuffer (%d)\n", ret); + fb_dealloc_cmap(&fb->fb.cmap); + unmap: iounmap(fb->regs); free_clk: clk_put(fb->clk); @@ -485,6 +489,8 @@ static int clcdfb_remove(struct amba_device *dev) clcdfb_disable(fb); unregister_framebuffer(&fb->fb); + if (fb->fb.cmap.len) + fb_dealloc_cmap(&fb->fb.cmap); iounmap(fb->regs); clk_put(fb->clk); diff --git a/drivers/video/amifb.c b/drivers/video/amifb.c index 100f23661465..82bedd7f7789 100644 --- a/drivers/video/amifb.c +++ b/drivers/video/amifb.c @@ -2437,7 +2437,9 @@ default_chipset: goto amifb_error; } - fb_alloc_cmap(&fb_info.cmap, 1<<fb_info.var.bits_per_pixel, 0); + err = fb_alloc_cmap(&fb_info.cmap, 1<<fb_info.var.bits_per_pixel, 0); + if (err) + goto amifb_error; if (register_framebuffer(&fb_info) < 0) { err = -EINVAL; @@ -2456,7 +2458,8 @@ amifb_error: static void amifb_deinit(void) { - fb_dealloc_cmap(&fb_info.cmap); + if (fb_info.cmap.len) + fb_dealloc_cmap(&fb_info.cmap); chipfree(); if (videomemory) iounmap((void*)videomemory); diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c index 314d18694b6a..d583bea608fd 100644 --- a/drivers/video/arkfb.c +++ b/drivers/video/arkfb.c @@ -470,7 +470,7 @@ static void ark_dac_read_regs(void *data, u8 *code, int count) while (count != 0) { - vga_wseq(NULL, 0x1C, regval | (code[0] & 4) ? 0x80 : 0); + vga_wseq(NULL, 0x1C, regval | (code[0] & 4 ? 0x80 : 0)); code[1] = vga_r(NULL, dac_regs[code[0] & 3]); count--; code += 2; @@ -485,7 +485,7 @@ static void ark_dac_write_regs(void *data, u8 *code, int count) while (count != 0) { - vga_wseq(NULL, 0x1C, regval | (code[0] & 4) ? 0x80 : 0); + vga_wseq(NULL, 0x1C, regval | (code[0] & 4 ? 0x80 : 0)); vga_w(NULL, dac_regs[code[0] & 3], code[1]); count--; code += 2; diff --git a/drivers/video/asiliantfb.c b/drivers/video/asiliantfb.c index 1fd22f460b0f..1a1f946d8fef 100644 --- a/drivers/video/asiliantfb.c +++ b/drivers/video/asiliantfb.c @@ -505,19 +505,27 @@ static struct fb_var_screeninfo asiliantfb_var __devinitdata = { .vsync_len = 2, }; -static void __devinit init_asiliant(struct fb_info *p, unsigned long addr) +static int __devinit init_asiliant(struct fb_info *p, unsigned long addr) { + int err; + p->fix = asiliantfb_fix; p->fix.smem_start = addr; p->var = asiliantfb_var; p->fbops = &asiliantfb_ops; p->flags = FBINFO_DEFAULT; - fb_alloc_cmap(&p->cmap, 256, 0); + err = fb_alloc_cmap(&p->cmap, 256, 0); + if (err) { + printk(KERN_ERR "C&T 69000 fb failed to alloc cmap memory\n"); + return err; + } - if (register_framebuffer(p) < 0) { + err = register_framebuffer(p); + if (err < 0) { printk(KERN_ERR "C&T 69000 framebuffer failed to register\n"); - return; + fb_dealloc_cmap(&p->cmap); + return err; } printk(KERN_INFO "fb%d: Asiliant 69000 frame buffer (%dK RAM detected)\n", @@ -532,6 +540,7 @@ asiliantfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) { unsigned long addr, size; struct fb_info *p; + int err; if ((dp->resource[0].flags & IORESOURCE_MEM) == 0) return -ENODEV; @@ -560,7 +569,13 @@ asiliantfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) pci_write_config_dword(dp, 4, 0x02800083); writeb(3, p->screen_base + 0x400784); - init_asiliant(p, addr); + err = init_asiliant(p, addr); + if (err) { + iounmap(p->screen_base); + release_mem_region(addr, size); + framebuffer_release(p); + return err; + } pci_set_drvdata(dp, p); return 0; @@ -571,6 +586,7 @@ static void __devexit asiliantfb_remove(struct pci_dev *dp) struct fb_info *p = pci_get_drvdata(dp); unregister_framebuffer(p); + fb_dealloc_cmap(&p->cmap); iounmap(p->screen_base); release_mem_region(pci_resource_start(dp, 0), pci_resource_len(dp, 0)); pci_set_drvdata(dp, NULL); diff --git a/drivers/video/aty/mach64_accel.c b/drivers/video/aty/mach64_accel.c index a8f60c33863c..0cc9724e61a2 100644 --- a/drivers/video/aty/mach64_accel.c +++ b/drivers/video/aty/mach64_accel.c @@ -39,7 +39,8 @@ void aty_reset_engine(const struct atyfb_par *par) { /* reset engine */ aty_st_le32(GEN_TEST_CNTL, - aty_ld_le32(GEN_TEST_CNTL, par) & ~GUI_ENGINE_ENABLE, par); + aty_ld_le32(GEN_TEST_CNTL, par) & + ~(GUI_ENGINE_ENABLE | HWCURSOR_ENABLE), par); /* enable engine */ aty_st_le32(GEN_TEST_CNTL, aty_ld_le32(GEN_TEST_CNTL, par) | GUI_ENGINE_ENABLE, par); diff --git a/drivers/video/aty/mach64_cursor.c b/drivers/video/aty/mach64_cursor.c index faf95da8fcbc..04c710804bb0 100644 --- a/drivers/video/aty/mach64_cursor.c +++ b/drivers/video/aty/mach64_cursor.c @@ -77,9 +77,13 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor) if (par->asleep) return -EPERM; - /* Hide cursor */ wait_for_fifo(1, par); - aty_st_le32(GEN_TEST_CNTL, aty_ld_le32(GEN_TEST_CNTL, par) & ~HWCURSOR_ENABLE, par); + if (cursor->enable) + aty_st_le32(GEN_TEST_CNTL, aty_ld_le32(GEN_TEST_CNTL, par) + | HWCURSOR_ENABLE, par); + else + aty_st_le32(GEN_TEST_CNTL, aty_ld_le32(GEN_TEST_CNTL, par) + & ~HWCURSOR_ENABLE, par); /* set position */ if (cursor->set & FB_CUR_SETPOS) { @@ -109,7 +113,7 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor) y<<=1; h<<=1; } - wait_for_fifo(4, par); + wait_for_fifo(3, par); aty_st_le32(CUR_OFFSET, (info->fix.smem_len >> 3) + (yoff << 1), par); aty_st_le32(CUR_HORZ_VERT_OFF, ((u32) (64 - h + yoff) << 16) | xoff, par); @@ -177,11 +181,6 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor) } } - if (cursor->enable) { - wait_for_fifo(1, par); - aty_st_le32(GEN_TEST_CNTL, aty_ld_le32(GEN_TEST_CNTL, par) - | HWCURSOR_ENABLE, par); - } return 0; } diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c index 1de0c0032468..97a1f095f327 100644 --- a/drivers/video/aty/radeon_pm.c +++ b/drivers/video/aty/radeon_pm.c @@ -89,6 +89,9 @@ static struct radeon_device_id radeon_workaround_list[] = { BUGFIX("Acer Aspire 2010", PCI_VENDOR_ID_AI, 0x0061, radeon_pm_off, radeon_reinitialize_M10), + BUGFIX("Acer Travelmate 290D/292LMi", + PCI_VENDOR_ID_AI, 0x005a, + radeon_pm_off, radeon_reinitialize_M10), { .ident = NULL } }; diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index 157057c79ca3..dd37cbcaf8ce 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -35,6 +35,8 @@ static int fb_notifier_callback(struct notifier_block *self, return 0; bd = container_of(self, struct backlight_device, fb_notif); + if (!lock_fb_info(evdata->info)) + return -ENODEV; mutex_lock(&bd->ops_lock); if (bd->ops) if (!bd->ops->check_fb || @@ -47,6 +49,7 @@ static int fb_notifier_callback(struct notifier_block *self, backlight_update_status(bd); } mutex_unlock(&bd->ops_lock); + unlock_fb_info(evdata->info); return 0; } diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index b6449470106c..0bb13df0fa89 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -40,6 +40,8 @@ static int fb_notifier_callback(struct notifier_block *self, if (!ld->ops) return 0; + if (!lock_fb_info(evdata->info)) + return -ENODEV; mutex_lock(&ld->ops_lock); if (!ld->ops->check_fb || ld->ops->check_fb(ld, evdata->info)) { if (event == FB_EVENT_BLANK) { @@ -51,6 +53,7 @@ static int fb_notifier_callback(struct notifier_block *self, } } mutex_unlock(&ld->ops_lock); + unlock_fb_info(evdata->info); return 0; } diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c index a2aa6ddffbe2..d42e385f091c 100644 --- a/drivers/video/cirrusfb.c +++ b/drivers/video/cirrusfb.c @@ -34,8 +34,6 @@ * */ -#define CIRRUSFB_VERSION "2.0-pre2" - #include <linux/module.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -72,20 +70,9 @@ * */ -/* enable debug output? */ -/* #define CIRRUSFB_DEBUG 1 */ - /* disable runtime assertions? */ /* #define CIRRUSFB_NDEBUG */ -/* debug output */ -#ifdef CIRRUSFB_DEBUG -#define DPRINTK(fmt, args...) \ - printk(KERN_DEBUG "%s: " fmt, __func__ , ## args) -#else -#define DPRINTK(fmt, args...) -#endif - /* debugging assertions */ #ifndef CIRRUSFB_NDEBUG #define assert(expr) \ @@ -108,14 +95,15 @@ /* board types */ enum cirrus_board { BT_NONE = 0, - BT_SD64, - BT_PICCOLO, - BT_PICASSO, - BT_SPECTRUM, + BT_SD64, /* GD5434 */ + BT_PICCOLO, /* GD5426 */ + BT_PICASSO, /* GD5426 or GD5428 */ + BT_SPECTRUM, /* GD5426 or GD5428 */ BT_PICASSO4, /* GD5446 */ BT_ALPINE, /* GD543x/4x */ BT_GD5480, - BT_LAGUNA, /* GD546x */ + BT_LAGUNA, /* GD5462/64 */ + BT_LAGUNAB, /* GD5465 */ }; /* @@ -150,15 +138,17 @@ static const struct cirrusfb_board_info_rec { .maxclock = { /* guess */ /* the SD64/P4 have a higher max. videoclock */ - 140000, 140000, 140000, 140000, 140000, + 135100, 135100, 85500, 85500, 0 }, .init_sr07 = true, .init_sr1f = true, .scrn_start_bit19 = true, .sr07 = 0xF0, .sr07_1bpp = 0xF0, + .sr07_1bpp_mux = 0xF6, .sr07_8bpp = 0xF1, - .sr1f = 0x20 + .sr07_8bpp_mux = 0xF7, + .sr1f = 0x1E }, [BT_PICCOLO] = { .name = "CL Piccolo", @@ -210,9 +200,11 @@ static const struct cirrusfb_board_info_rec { .init_sr07 = true, .init_sr1f = false, .scrn_start_bit19 = true, - .sr07 = 0x20, - .sr07_1bpp = 0x20, - .sr07_8bpp = 0x21, + .sr07 = 0xA0, + .sr07_1bpp = 0xA0, + .sr07_1bpp_mux = 0xA6, + .sr07_8bpp = 0xA1, + .sr07_8bpp_mux = 0xA7, .sr1f = 0 }, [BT_ALPINE] = { @@ -225,8 +217,8 @@ static const struct cirrusfb_board_info_rec { .init_sr1f = true, .scrn_start_bit19 = true, .sr07 = 0xA0, - .sr07_1bpp = 0xA1, - .sr07_1bpp_mux = 0xA7, + .sr07_1bpp = 0xA0, + .sr07_1bpp_mux = 0xA6, .sr07_8bpp = 0xA1, .sr07_8bpp_mux = 0xA7, .sr1f = 0x1C @@ -247,8 +239,18 @@ static const struct cirrusfb_board_info_rec { [BT_LAGUNA] = { .name = "CL Laguna", .maxclock = { - /* guess */ - 135100, 135100, 135100, 135100, 135100, + /* taken from X11 code */ + 170000, 170000, 170000, 170000, 135100, + }, + .init_sr07 = false, + .init_sr1f = false, + .scrn_start_bit19 = true, + }, + [BT_LAGUNAB] = { + .name = "CL Laguna AGP", + .maxclock = { + /* taken from X11 code */ + 170000, 250000, 170000, 170000, 135100, }, .init_sr07 = false, .init_sr1f = false, @@ -262,8 +264,8 @@ static const struct cirrusfb_board_info_rec { static struct pci_device_id cirrusfb_pci_table[] = { CHIP(PCI_DEVICE_ID_CIRRUS_5436, BT_ALPINE), - CHIP(PCI_DEVICE_ID_CIRRUS_5434_8, BT_ALPINE), - CHIP(PCI_DEVICE_ID_CIRRUS_5434_4, BT_ALPINE), + CHIP(PCI_DEVICE_ID_CIRRUS_5434_8, BT_SD64), + CHIP(PCI_DEVICE_ID_CIRRUS_5434_4, BT_SD64), CHIP(PCI_DEVICE_ID_CIRRUS_5430, BT_ALPINE), /* GD-5440 is same id */ CHIP(PCI_DEVICE_ID_CIRRUS_7543, BT_ALPINE), CHIP(PCI_DEVICE_ID_CIRRUS_7548, BT_ALPINE), @@ -271,7 +273,7 @@ static struct pci_device_id cirrusfb_pci_table[] = { CHIP(PCI_DEVICE_ID_CIRRUS_5446, BT_PICASSO4), /* Picasso 4 is 5446 */ CHIP(PCI_DEVICE_ID_CIRRUS_5462, BT_LAGUNA), /* CL Laguna */ CHIP(PCI_DEVICE_ID_CIRRUS_5464, BT_LAGUNA), /* CL Laguna 3D */ - CHIP(PCI_DEVICE_ID_CIRRUS_5465, BT_LAGUNA), /* CL Laguna 3DA*/ + CHIP(PCI_DEVICE_ID_CIRRUS_5465, BT_LAGUNAB), /* CL Laguna 3DA*/ { 0, } }; MODULE_DEVICE_TABLE(pci, cirrusfb_pci_table); @@ -326,10 +328,6 @@ static const struct { }; #endif /* CONFIG_ZORRO */ -struct cirrusfb_regs { - int multiplexing; -}; - #ifdef CIRRUSFB_DEBUG enum cirrusfb_dbg_reg_class { CRT, @@ -340,10 +338,12 @@ enum cirrusfb_dbg_reg_class { /* info about board */ struct cirrusfb_info { u8 __iomem *regbase; + u8 __iomem *laguna_mmio; enum cirrus_board btype; unsigned char SFR; /* Shadow of special function register */ - struct cirrusfb_regs currentmode; + int multiplexing; + int doubleVCLK; int blank_mode; u32 pseudo_palette[16]; @@ -357,43 +357,8 @@ static char *mode_option __devinitdata = "640x480@60"; /**** BEGIN PROTOTYPES ******************************************************/ /*--- Interface used by the world ------------------------------------------*/ -static int cirrusfb_init(void); -#ifndef MODULE -static int cirrusfb_setup(char *options); -#endif - -static int cirrusfb_open(struct fb_info *info, int user); -static int cirrusfb_release(struct fb_info *info, int user); -static int cirrusfb_setcolreg(unsigned regno, unsigned red, unsigned green, - unsigned blue, unsigned transp, - struct fb_info *info); -static int cirrusfb_check_var(struct fb_var_screeninfo *var, - struct fb_info *info); -static int cirrusfb_set_par(struct fb_info *info); static int cirrusfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info); -static int cirrusfb_blank(int blank_mode, struct fb_info *info); -static void cirrusfb_fillrect(struct fb_info *info, - const struct fb_fillrect *region); -static void cirrusfb_copyarea(struct fb_info *info, - const struct fb_copyarea *area); -static void cirrusfb_imageblit(struct fb_info *info, - const struct fb_image *image); - -/* function table of the above functions */ -static struct fb_ops cirrusfb_ops = { - .owner = THIS_MODULE, - .fb_open = cirrusfb_open, - .fb_release = cirrusfb_release, - .fb_setcolreg = cirrusfb_setcolreg, - .fb_check_var = cirrusfb_check_var, - .fb_set_par = cirrusfb_set_par, - .fb_pan_display = cirrusfb_pan_display, - .fb_blank = cirrusfb_blank, - .fb_fillrect = cirrusfb_fillrect, - .fb_copyarea = cirrusfb_copyarea, - .fb_imageblit = cirrusfb_imageblit, -}; /*--- Internal routines ----------------------------------------------------*/ static void init_vgachip(struct fb_info *info); @@ -421,22 +386,27 @@ static void cirrusfb_BitBLT(u8 __iomem *regbase, int bits_per_pixel, static void cirrusfb_RectFill(u8 __iomem *regbase, int bits_per_pixel, u_short x, u_short y, u_short width, u_short height, - u_char color, u_short line_length); + u32 fg_color, u32 bg_color, + u_short line_length, u_char blitmode); static void bestclock(long freq, int *nom, int *den, int *div); #ifdef CIRRUSFB_DEBUG -static void cirrusfb_dump(void); -static void cirrusfb_dbg_reg_dump(caddr_t regbase); -static void cirrusfb_dbg_print_regs(caddr_t regbase, +static void cirrusfb_dbg_reg_dump(struct fb_info *info, caddr_t regbase); +static void cirrusfb_dbg_print_regs(struct fb_info *info, + caddr_t regbase, enum cirrusfb_dbg_reg_class reg_class, ...); -static void cirrusfb_dbg_print_byte(const char *name, unsigned char val); #endif /* CIRRUSFB_DEBUG */ /*** END PROTOTYPES ********************************************************/ /*****************************************************************************/ /*** BEGIN Interface Used by the World ***************************************/ +static inline int is_laguna(const struct cirrusfb_info *cinfo) +{ + return cinfo->btype == BT_LAGUNA || cinfo->btype == BT_LAGUNAB; +} + static int opencount; /*--- Open /dev/fbx ---------------------------------------------------------*/ @@ -460,85 +430,94 @@ static int cirrusfb_release(struct fb_info *info, int user) /**** BEGIN Hardware specific Routines **************************************/ /* Check if the MCLK is not a better clock source */ -static int cirrusfb_check_mclk(struct cirrusfb_info *cinfo, long freq) +static int cirrusfb_check_mclk(struct fb_info *info, long freq) { + struct cirrusfb_info *cinfo = info->par; long mclk = vga_rseq(cinfo->regbase, CL_SEQR1F) & 0x3f; /* Read MCLK value */ mclk = (14318 * mclk) >> 3; - DPRINTK("Read MCLK of %ld kHz\n", mclk); + dev_dbg(info->device, "Read MCLK of %ld kHz\n", mclk); /* Determine if we should use MCLK instead of VCLK, and if so, what we * should divide it by to get VCLK */ if (abs(freq - mclk) < 250) { - DPRINTK("Using VCLK = MCLK\n"); + dev_dbg(info->device, "Using VCLK = MCLK\n"); return 1; } else if (abs(freq - (mclk / 2)) < 250) { - DPRINTK("Using VCLK = MCLK/2\n"); + dev_dbg(info->device, "Using VCLK = MCLK/2\n"); return 2; } return 0; } -static int cirrusfb_check_var(struct fb_var_screeninfo *var, - struct fb_info *info) +static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, + struct fb_info *info) { - int yres; - /* memory size in pixels */ - unsigned pixels = info->screen_size * 8 / var->bits_per_pixel; + long freq; + long maxclock; + struct cirrusfb_info *cinfo = info->par; + unsigned maxclockidx = var->bits_per_pixel >> 3; - switch (var->bits_per_pixel) { - case 1: - pixels /= 4; - break; /* 8 pixel per byte, only 1/4th of mem usable */ - case 8: - case 16: - case 32: - break; /* 1 pixel == 1 byte */ - default: - printk(KERN_ERR "cirrusfb: mode %dx%dx%d rejected..." - "color depth not supported.\n", - var->xres, var->yres, var->bits_per_pixel); - DPRINTK("EXIT - EINVAL error\n"); - return -EINVAL; - } + /* convert from ps to kHz */ + freq = PICOS2KHZ(var->pixclock); - if (var->xres_virtual < var->xres) - var->xres_virtual = var->xres; - /* use highest possible virtual resolution */ - if (var->yres_virtual == -1) { - var->yres_virtual = pixels / var->xres_virtual; + dev_dbg(info->device, "desired pixclock: %ld kHz\n", freq); - printk(KERN_INFO "cirrusfb: virtual resolution set to " - "maximum of %dx%d\n", var->xres_virtual, - var->yres_virtual); - } - if (var->yres_virtual < var->yres) - var->yres_virtual = var->yres; + maxclock = cirrusfb_board_info[cinfo->btype].maxclock[maxclockidx]; + cinfo->multiplexing = 0; - if (var->xres_virtual * var->yres_virtual > pixels) { - printk(KERN_ERR "cirrusfb: mode %dx%dx%d rejected... " - "virtual resolution too high to fit into video memory!\n", - var->xres_virtual, var->yres_virtual, - var->bits_per_pixel); - DPRINTK("EXIT - EINVAL error\n"); + /* If the frequency is greater than we can support, we might be able + * to use multiplexing for the video mode */ + if (freq > maxclock) { + dev_err(info->device, + "Frequency greater than maxclock (%ld kHz)\n", + maxclock); return -EINVAL; } + /* + * Additional constraint: 8bpp uses DAC clock doubling to allow maximum + * pixel clock + */ + if (var->bits_per_pixel == 8) { + switch (cinfo->btype) { + case BT_ALPINE: + case BT_SD64: + case BT_PICASSO4: + if (freq > 85500) + cinfo->multiplexing = 1; + break; + case BT_GD5480: + if (freq > 135100) + cinfo->multiplexing = 1; + break; + default: + break; + } + } - if (var->xoffset < 0) - var->xoffset = 0; - if (var->yoffset < 0) - var->yoffset = 0; + /* If we have a 1MB 5434, we need to put ourselves in a mode where + * the VCLK is double the pixel clock. */ + cinfo->doubleVCLK = 0; + if (cinfo->btype == BT_SD64 && info->fix.smem_len <= MB_ && + var->bits_per_pixel == 16) { + cinfo->doubleVCLK = 1; + } - /* truncate xoffset and yoffset to maximum if too high */ - if (var->xoffset > var->xres_virtual - var->xres) - var->xoffset = var->xres_virtual - var->xres - 1; - if (var->yoffset > var->yres_virtual - var->yres) - var->yoffset = var->yres_virtual - var->yres - 1; + return 0; +} + +static int cirrusfb_check_var(struct fb_var_screeninfo *var, + struct fb_info *info) +{ + int yres; + /* memory size in pixels */ + unsigned pixels = info->screen_size * 8 / var->bits_per_pixel; + struct cirrusfb_info *cinfo = info->par; switch (var->bits_per_pixel) { case 1: @@ -550,7 +529,7 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var, case 8: var->red.offset = 0; - var->red.length = 6; + var->red.length = 8; var->green = var->red; var->blue = var->red; break; @@ -561,20 +540,20 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var, var->green.offset = -3; var->blue.offset = 8; } else { - var->red.offset = 10; + var->red.offset = 11; var->green.offset = 5; var->blue.offset = 0; } var->red.length = 5; - var->green.length = 5; + var->green.length = 6; var->blue.length = 5; break; - case 32: + case 24: if (isPReP) { - var->red.offset = 8; - var->green.offset = 16; - var->blue.offset = 24; + var->red.offset = 0; + var->green.offset = 8; + var->blue.offset = 16; } else { var->red.offset = 16; var->green.offset = 8; @@ -586,12 +565,45 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var, break; default: - DPRINTK("Unsupported bpp size: %d\n", var->bits_per_pixel); + dev_dbg(info->device, + "Unsupported bpp size: %d\n", var->bits_per_pixel); assert(false); /* should never occur */ break; } + if (var->xres_virtual < var->xres) + var->xres_virtual = var->xres; + /* use highest possible virtual resolution */ + if (var->yres_virtual == -1) { + var->yres_virtual = pixels / var->xres_virtual; + + dev_info(info->device, + "virtual resolution set to maximum of %dx%d\n", + var->xres_virtual, var->yres_virtual); + } + if (var->yres_virtual < var->yres) + var->yres_virtual = var->yres; + + if (var->xres_virtual * var->yres_virtual > pixels) { + dev_err(info->device, "mode %dx%dx%d rejected... " + "virtual resolution too high to fit into video memory!\n", + var->xres_virtual, var->yres_virtual, + var->bits_per_pixel); + return -EINVAL; + } + + if (var->xoffset < 0) + var->xoffset = 0; + if (var->yoffset < 0) + var->yoffset = 0; + + /* truncate xoffset and yoffset to maximum if too high */ + if (var->xoffset > var->xres_virtual - var->xres) + var->xoffset = var->xres_virtual - var->xres - 1; + if (var->yoffset > var->yres_virtual - var->yres) + var->yoffset = var->yres_virtual - var->yres - 1; + var->red.msb_right = var->green.msb_right = var->blue.msb_right = @@ -606,99 +618,31 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var, yres = (yres + 1) / 2; if (yres >= 1280) { - printk(KERN_ERR "cirrusfb: ERROR: VerticalTotal >= 1280; " + dev_err(info->device, "ERROR: VerticalTotal >= 1280; " "special treatment required! (TODO)\n"); - DPRINTK("EXIT - EINVAL error\n"); return -EINVAL; } - return 0; -} - -static int cirrusfb_decode_var(const struct fb_var_screeninfo *var, - struct cirrusfb_regs *regs, - struct fb_info *info) -{ - long freq; - long maxclock; - int maxclockidx = var->bits_per_pixel >> 3; - struct cirrusfb_info *cinfo = info->par; - - switch (var->bits_per_pixel) { - case 1: - info->fix.line_length = var->xres_virtual / 8; - info->fix.visual = FB_VISUAL_MONO10; - break; - - case 8: - info->fix.line_length = var->xres_virtual; - info->fix.visual = FB_VISUAL_PSEUDOCOLOR; - break; - - case 16: - case 32: - info->fix.line_length = var->xres_virtual * maxclockidx; - info->fix.visual = FB_VISUAL_TRUECOLOR; - break; - - default: - DPRINTK("Unsupported bpp size: %d\n", var->bits_per_pixel); - assert(false); - /* should never occur */ - break; - } - - info->fix.type = FB_TYPE_PACKED_PIXELS; - - /* convert from ps to kHz */ - freq = PICOS2KHZ(var->pixclock); - - DPRINTK("desired pixclock: %ld kHz\n", freq); - - maxclock = cirrusfb_board_info[cinfo->btype].maxclock[maxclockidx]; - regs->multiplexing = 0; + if (cirrusfb_check_pixclock(var, info)) + return -EINVAL; - /* If the frequency is greater than we can support, we might be able - * to use multiplexing for the video mode */ - if (freq > maxclock) { - switch (cinfo->btype) { - case BT_ALPINE: - case BT_GD5480: - regs->multiplexing = 1; - break; + if (!is_laguna(cinfo)) + var->accel_flags = FB_ACCELF_TEXT; - default: - printk(KERN_ERR "cirrusfb: Frequency greater " - "than maxclock (%ld kHz)\n", maxclock); - DPRINTK("EXIT - return -EINVAL\n"); - return -EINVAL; - } - } -#if 0 - /* TODO: If we have a 1MB 5434, we need to put ourselves in a mode where - * the VCLK is double the pixel clock. */ - switch (var->bits_per_pixel) { - case 16: - case 32: - if (var->xres <= 800) - /* Xbh has this type of clock for 32-bit */ - freq /= 2; - break; - } -#endif return 0; } -static void cirrusfb_set_mclk_as_source(const struct cirrusfb_info *cinfo, - int div) +static void cirrusfb_set_mclk_as_source(const struct fb_info *info, int div) { + struct cirrusfb_info *cinfo = info->par; unsigned char old1f, old1e; + assert(cinfo != NULL); old1f = vga_rseq(cinfo->regbase, CL_SEQR1F) & ~0x40; if (div) { - DPRINTK("Set %s as pixclock source.\n", - (div == 2) ? "MCLK/2" : "MCLK"); + dev_dbg(info->device, "Set %s as pixclock source.\n", + (div == 2) ? "MCLK/2" : "MCLK"); old1f |= 0x40; old1e = vga_rseq(cinfo->regbase, CL_SEQR1E) & ~0x1; if (div == 2) @@ -718,101 +662,119 @@ static int cirrusfb_set_par_foo(struct fb_info *info) { struct cirrusfb_info *cinfo = info->par; struct fb_var_screeninfo *var = &info->var; - struct cirrusfb_regs regs; u8 __iomem *regbase = cinfo->regbase; unsigned char tmp; - int offset = 0, err; + int pitch; const struct cirrusfb_board_info_rec *bi; int hdispend, hsyncstart, hsyncend, htotal; int yres, vdispend, vsyncstart, vsyncend, vtotal; long freq; int nom, den, div; + unsigned int control = 0, format = 0, threshold = 0; - DPRINTK("ENTER\n"); - DPRINTK("Requested mode: %dx%dx%d\n", + dev_dbg(info->device, "Requested mode: %dx%dx%d\n", var->xres, var->yres, var->bits_per_pixel); - DPRINTK("pixclock: %d\n", var->pixclock); - init_vgachip(info); + switch (var->bits_per_pixel) { + case 1: + info->fix.line_length = var->xres_virtual / 8; + info->fix.visual = FB_VISUAL_MONO10; + break; - err = cirrusfb_decode_var(var, ®s, info); - if (err) { - /* should never happen */ - DPRINTK("mode change aborted. invalid var.\n"); - return -EINVAL; + case 8: + info->fix.line_length = var->xres_virtual; + info->fix.visual = FB_VISUAL_PSEUDOCOLOR; + break; + + case 16: + case 24: + info->fix.line_length = var->xres_virtual * + var->bits_per_pixel >> 3; + info->fix.visual = FB_VISUAL_TRUECOLOR; + break; } + info->fix.type = FB_TYPE_PACKED_PIXELS; + + init_vgachip(info); bi = &cirrusfb_board_info[cinfo->btype]; hsyncstart = var->xres + var->right_margin; hsyncend = hsyncstart + var->hsync_len; - htotal = (hsyncend + var->left_margin) / 8 - 5; - hdispend = var->xres / 8 - 1; - hsyncstart = hsyncstart / 8 + 1; - hsyncend = hsyncend / 8 + 1; + htotal = (hsyncend + var->left_margin) / 8; + hdispend = var->xres / 8; + hsyncstart = hsyncstart / 8; + hsyncend = hsyncend / 8; - yres = var->yres; - vsyncstart = yres + var->lower_margin; + vdispend = var->yres; + vsyncstart = vdispend + var->lower_margin; vsyncend = vsyncstart + var->vsync_len; vtotal = vsyncend + var->upper_margin; - vdispend = yres - 1; if (var->vmode & FB_VMODE_DOUBLE) { - yres *= 2; + vdispend *= 2; vsyncstart *= 2; vsyncend *= 2; vtotal *= 2; } else if (var->vmode & FB_VMODE_INTERLACED) { - yres = (yres + 1) / 2; + vdispend = (vdispend + 1) / 2; vsyncstart = (vsyncstart + 1) / 2; vsyncend = (vsyncend + 1) / 2; vtotal = (vtotal + 1) / 2; } - - vtotal -= 2; - vsyncstart -= 1; - vsyncend -= 1; - + yres = vdispend; if (yres >= 1024) { vtotal /= 2; vsyncstart /= 2; vsyncend /= 2; vdispend /= 2; } - if (regs.multiplexing) { + + vdispend -= 1; + vsyncstart -= 1; + vsyncend -= 1; + vtotal -= 2; + + if (cinfo->multiplexing) { htotal /= 2; hsyncstart /= 2; hsyncend /= 2; hdispend /= 2; } + + htotal -= 5; + hdispend -= 1; + hsyncstart += 1; + hsyncend += 1; + /* unlock register VGA_CRTC_H_TOTAL..CRT7 */ vga_wcrt(regbase, VGA_CRTC_V_SYNC_END, 0x20); /* previously: 0x00) */ /* if debugging is enabled, all parameters get output before writing */ - DPRINTK("CRT0: %d\n", htotal); + dev_dbg(info->device, "CRT0: %d\n", htotal); vga_wcrt(regbase, VGA_CRTC_H_TOTAL, htotal); - DPRINTK("CRT1: %d\n", hdispend); + dev_dbg(info->device, "CRT1: %d\n", hdispend); vga_wcrt(regbase, VGA_CRTC_H_DISP, hdispend); - DPRINTK("CRT2: %d\n", var->xres / 8); + dev_dbg(info->device, "CRT2: %d\n", var->xres / 8); vga_wcrt(regbase, VGA_CRTC_H_BLANK_START, var->xres / 8); /* + 128: Compatible read */ - DPRINTK("CRT3: 128+%d\n", (htotal + 5) % 32); + dev_dbg(info->device, "CRT3: 128+%d\n", (htotal + 5) % 32); vga_wcrt(regbase, VGA_CRTC_H_BLANK_END, 128 + ((htotal + 5) % 32)); - DPRINTK("CRT4: %d\n", hsyncstart); + dev_dbg(info->device, "CRT4: %d\n", hsyncstart); vga_wcrt(regbase, VGA_CRTC_H_SYNC_START, hsyncstart); tmp = hsyncend % 32; if ((htotal + 5) & 32) tmp += 128; - DPRINTK("CRT5: %d\n", tmp); + dev_dbg(info->device, "CRT5: %d\n", tmp); vga_wcrt(regbase, VGA_CRTC_H_SYNC_END, tmp); - DPRINTK("CRT6: %d\n", vtotal & 0xff); + dev_dbg(info->device, "CRT6: %d\n", vtotal & 0xff); vga_wcrt(regbase, VGA_CRTC_V_TOTAL, vtotal & 0xff); tmp = 16; /* LineCompare bit #9 */ @@ -830,7 +792,7 @@ static int cirrusfb_set_par_foo(struct fb_info *info) tmp |= 64; if (vsyncstart & 512) tmp |= 128; - DPRINTK("CRT7: %d\n", tmp); + dev_dbg(info->device, "CRT7: %d\n", tmp); vga_wcrt(regbase, VGA_CRTC_OVERFLOW, tmp); tmp = 0x40; /* LineCompare bit #8 */ @@ -838,25 +800,25 @@ static int cirrusfb_set_par_foo(struct fb_info *info) tmp |= 0x20; if (var->vmode & FB_VMODE_DOUBLE) tmp |= 0x80; - DPRINTK("CRT9: %d\n", tmp); + dev_dbg(info->device, "CRT9: %d\n", tmp); vga_wcrt(regbase, VGA_CRTC_MAX_SCAN, tmp); - DPRINTK("CRT10: %d\n", vsyncstart & 0xff); + dev_dbg(info->device, "CRT10: %d\n", vsyncstart & 0xff); vga_wcrt(regbase, VGA_CRTC_V_SYNC_START, vsyncstart & 0xff); - DPRINTK("CRT11: 64+32+%d\n", vsyncend % 16); + dev_dbg(info->device, "CRT11: 64+32+%d\n", vsyncend % 16); vga_wcrt(regbase, VGA_CRTC_V_SYNC_END, vsyncend % 16 + 64 + 32); - DPRINTK("CRT12: %d\n", vdispend & 0xff); + dev_dbg(info->device, "CRT12: %d\n", vdispend & 0xff); vga_wcrt(regbase, VGA_CRTC_V_DISP_END, vdispend & 0xff); - DPRINTK("CRT15: %d\n", (vdispend + 1) & 0xff); + dev_dbg(info->device, "CRT15: %d\n", (vdispend + 1) & 0xff); vga_wcrt(regbase, VGA_CRTC_V_BLANK_START, (vdispend + 1) & 0xff); - DPRINTK("CRT16: %d\n", vtotal & 0xff); + dev_dbg(info->device, "CRT16: %d\n", vtotal & 0xff); vga_wcrt(regbase, VGA_CRTC_V_BLANK_END, vtotal & 0xff); - DPRINTK("CRT18: 0xff\n"); + dev_dbg(info->device, "CRT18: 0xff\n"); vga_wcrt(regbase, VGA_CRTC_LINE_COMPARE, 0xff); tmp = 0; @@ -871,41 +833,75 @@ static int cirrusfb_set_par_foo(struct fb_info *info) if (vtotal & 512) tmp |= 128; - DPRINTK("CRT1a: %d\n", tmp); + dev_dbg(info->device, "CRT1a: %d\n", tmp); vga_wcrt(regbase, CL_CRT1A, tmp); freq = PICOS2KHZ(var->pixclock); + if (var->bits_per_pixel == 24) + if (cinfo->btype == BT_ALPINE || cinfo->btype == BT_SD64) + freq *= 3; + if (cinfo->multiplexing) + freq /= 2; + if (cinfo->doubleVCLK) + freq *= 2; + bestclock(freq, &nom, &den, &div); + dev_dbg(info->device, "VCLK freq: %ld kHz nom: %d den: %d div: %d\n", + freq, nom, den, div); + /* set VCLK0 */ /* hardware RefClock: 14.31818 MHz */ /* formula: VClk = (OSC * N) / (D * (1+P)) */ /* Example: VClk = (14.31818 * 91) / (23 * (1+1)) = 28.325 MHz */ - if (cinfo->btype == BT_ALPINE) { + if (cinfo->btype == BT_ALPINE || cinfo->btype == BT_PICASSO4 || + cinfo->btype == BT_SD64) { /* if freq is close to mclk or mclk/2 select mclk * as clock source */ - int divMCLK = cirrusfb_check_mclk(cinfo, freq); - if (divMCLK) { + int divMCLK = cirrusfb_check_mclk(info, freq); + if (divMCLK) nom = 0; - cirrusfb_set_mclk_as_source(cinfo, divMCLK); + cirrusfb_set_mclk_as_source(info, divMCLK); + } + if (is_laguna(cinfo)) { + long pcifc = fb_readl(cinfo->laguna_mmio + 0x3fc); + unsigned char tile = fb_readb(cinfo->laguna_mmio + 0x407); + unsigned short tile_control; + + if (cinfo->btype == BT_LAGUNAB) { + tile_control = fb_readw(cinfo->laguna_mmio + 0x2c4); + tile_control &= ~0x80; + fb_writew(tile_control, cinfo->laguna_mmio + 0x2c4); } + + fb_writel(pcifc | 0x10000000l, cinfo->laguna_mmio + 0x3fc); + fb_writeb(tile & 0x3f, cinfo->laguna_mmio + 0x407); + control = fb_readw(cinfo->laguna_mmio + 0x402); + threshold = fb_readw(cinfo->laguna_mmio + 0xea); + control &= ~0x6800; + format = 0; + threshold &= 0xffc0 & 0x3fbf; } if (nom) { - vga_wseq(regbase, CL_SEQRB, nom); tmp = den << 1; if (div != 0) tmp |= 1; - /* 6 bit denom; ONLY 5434!!! (bugged me 10 days) */ if ((cinfo->btype == BT_SD64) || (cinfo->btype == BT_ALPINE) || (cinfo->btype == BT_GD5480)) tmp |= 0x80; - DPRINTK("CL_SEQR1B: %ld\n", (long) tmp); - vga_wseq(regbase, CL_SEQR1B, tmp); + /* Laguna chipset has reversed clock registers */ + if (is_laguna(cinfo)) { + vga_wseq(regbase, CL_SEQRE, tmp); + vga_wseq(regbase, CL_SEQR1E, nom); + } else { + vga_wseq(regbase, CL_SEQRE, nom); + vga_wseq(regbase, CL_SEQR1E, tmp); + } } if (yres >= 1024) @@ -916,9 +912,6 @@ static int cirrusfb_set_par_foo(struct fb_info *info) * address wrap, no compat. */ vga_wcrt(regbase, VGA_CRTC_MODE, 0xc3); -/* HAEH? vga_wcrt(regbase, VGA_CRTC_V_SYNC_END, 0x20); - * previously: 0x00 unlock VGA_CRTC_H_TOTAL..CRT7 */ - /* don't know if it would hurt to also program this if no interlaced */ /* mode is used, but I feel better this way.. :-) */ if (var->vmode & FB_VMODE_INTERLACED) @@ -926,19 +919,15 @@ static int cirrusfb_set_par_foo(struct fb_info *info) else vga_wcrt(regbase, VGA_CRTC_REGS, 0x00); /* interlace control */ - vga_wseq(regbase, VGA_SEQ_CHARACTER_MAP, 0); - - /* adjust horizontal/vertical sync type (low/high) */ + /* adjust horizontal/vertical sync type (low/high), use VCLK3 */ /* enable display memory & CRTC I/O address for color mode */ - tmp = 0x03; + tmp = 0x03 | 0xc; if (var->sync & FB_SYNC_HOR_HIGH_ACT) tmp |= 0x40; if (var->sync & FB_SYNC_VERT_HIGH_ACT) tmp |= 0x80; WGen(cinfo, VGA_MIS_W, tmp); - /* Screen A Preset Row-Scan register */ - vga_wcrt(regbase, VGA_CRTC_PRESET_ROW, 0); /* text cursor on and start line */ vga_wcrt(regbase, VGA_CRTC_CURSOR_START, 0); /* text cursor end line */ @@ -952,7 +941,7 @@ static int cirrusfb_set_par_foo(struct fb_info *info) /* programming for different color depths */ if (var->bits_per_pixel == 1) { - DPRINTK("cirrusfb: preparing for 1 bit deep display\n"); + dev_dbg(info->device, "preparing for 1 bit deep display\n"); vga_wgfx(regbase, VGA_GFX_MODE, 0); /* mode register */ /* SR07 */ @@ -964,68 +953,53 @@ static int cirrusfb_set_par_foo(struct fb_info *info) case BT_PICASSO4: case BT_ALPINE: case BT_GD5480: - DPRINTK(" (for GD54xx)\n"); vga_wseq(regbase, CL_SEQR7, - regs.multiplexing ? + cinfo->multiplexing ? bi->sr07_1bpp_mux : bi->sr07_1bpp); break; case BT_LAGUNA: - DPRINTK(" (for GD546x)\n"); + case BT_LAGUNAB: vga_wseq(regbase, CL_SEQR7, vga_rseq(regbase, CL_SEQR7) & ~0x01); break; default: - printk(KERN_WARNING "cirrusfb: unknown Board\n"); + dev_warn(info->device, "unknown Board\n"); break; } /* Extended Sequencer Mode */ switch (cinfo->btype) { - case BT_SD64: - /* setting the SEQRF on SD64 is not necessary - * (only during init) - */ - DPRINTK("(for SD64)\n"); - /* MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x1a); - break; case BT_PICCOLO: case BT_SPECTRUM: - DPRINTK("(for Piccolo/Spectrum)\n"); - /* ### ueberall 0x22? */ - /* ##vorher 1c MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x22); /* evtl d0 bei 1 bit? avoid FIFO underruns..? */ vga_wseq(regbase, CL_SEQRF, 0xb0); break; case BT_PICASSO: - DPRINTK("(for Picasso)\n"); - /* ##vorher 22 MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x22); /* ## vorher d0 avoid FIFO underruns..? */ vga_wseq(regbase, CL_SEQRF, 0xd0); break; + case BT_SD64: case BT_PICASSO4: case BT_ALPINE: case BT_GD5480: case BT_LAGUNA: - DPRINTK(" (for GD54xx)\n"); + case BT_LAGUNAB: /* do nothing */ break; default: - printk(KERN_WARNING "cirrusfb: unknown Board\n"); + dev_warn(info->device, "unknown Board\n"); break; } /* pixel mask: pass-through for first plane */ WGen(cinfo, VGA_PEL_MSK, 0x01); - if (regs.multiplexing) + if (cinfo->multiplexing) /* hidden dac reg: 1280x1024 */ WHDR(cinfo, 0x4a); else @@ -1035,7 +1009,6 @@ static int cirrusfb_set_par_foo(struct fb_info *info) vga_wseq(regbase, VGA_SEQ_MEMORY_MODE, 0x06); /* plane mask: only write to first plane */ vga_wseq(regbase, VGA_SEQ_PLANE_WRITE, 0x01); - offset = var->xres_virtual / 16; } /****************************************************** @@ -1045,7 +1018,7 @@ static int cirrusfb_set_par_foo(struct fb_info *info) */ else if (var->bits_per_pixel == 8) { - DPRINTK("cirrusfb: preparing for 8 bit deep display\n"); + dev_dbg(info->device, "preparing for 8 bit deep display\n"); switch (cinfo->btype) { case BT_SD64: case BT_PICCOLO: @@ -1054,34 +1027,27 @@ static int cirrusfb_set_par_foo(struct fb_info *info) case BT_PICASSO4: case BT_ALPINE: case BT_GD5480: - DPRINTK(" (for GD54xx)\n"); vga_wseq(regbase, CL_SEQR7, - regs.multiplexing ? + cinfo->multiplexing ? bi->sr07_8bpp_mux : bi->sr07_8bpp); break; case BT_LAGUNA: - DPRINTK(" (for GD546x)\n"); + case BT_LAGUNAB: vga_wseq(regbase, CL_SEQR7, vga_rseq(regbase, CL_SEQR7) | 0x01); + threshold |= 0x10; break; default: - printk(KERN_WARNING "cirrusfb: unknown Board\n"); + dev_warn(info->device, "unknown Board\n"); break; } switch (cinfo->btype) { - case BT_SD64: - /* MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x1d); - break; - case BT_PICCOLO: case BT_PICASSO: case BT_SPECTRUM: - /* ### vorher 1c MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x22); /* Fast Page-Mode writes */ vga_wseq(regbase, CL_SEQRF, 0xb0); break; @@ -1091,40 +1057,27 @@ static int cirrusfb_set_par_foo(struct fb_info *info) /* ### INCOMPLETE!! */ vga_wseq(regbase, CL_SEQRF, 0xb8); #endif -/* vga_wseq(regbase, CL_SEQR1F, 0x1c); */ - break; - case BT_ALPINE: - DPRINTK(" (for GD543x)\n"); - /* We already set SRF and SR1F */ - break; - + case BT_SD64: case BT_GD5480: case BT_LAGUNA: - DPRINTK(" (for GD54xx)\n"); + case BT_LAGUNAB: /* do nothing */ break; default: - printk(KERN_WARNING "cirrusfb: unknown Board\n"); + dev_warn(info->device, "unknown board\n"); break; } /* mode register: 256 color mode */ vga_wgfx(regbase, VGA_GFX_MODE, 64); - /* pixel mask: pass-through all planes */ - WGen(cinfo, VGA_PEL_MSK, 0xff); - if (regs.multiplexing) + if (cinfo->multiplexing) /* hidden dac reg: 1280x1024 */ WHDR(cinfo, 0x4a); else /* hidden dac: nothing */ WHDR(cinfo, 0); - /* memory mode: chain4, ext. memory */ - vga_wseq(regbase, VGA_SEQ_MEMORY_MODE, 0x0a); - /* plane mask: enable writing to all 4 planes */ - vga_wseq(regbase, VGA_SEQ_PLANE_WRITE, 0xff); - offset = var->xres_virtual / 8; } /****************************************************** @@ -1134,147 +1087,110 @@ static int cirrusfb_set_par_foo(struct fb_info *info) */ else if (var->bits_per_pixel == 16) { - DPRINTK("cirrusfb: preparing for 16 bit deep display\n"); + dev_dbg(info->device, "preparing for 16 bit deep display\n"); switch (cinfo->btype) { - case BT_SD64: - /* Extended Sequencer Mode: 256c col. mode */ - vga_wseq(regbase, CL_SEQR7, 0xf7); - /* MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x1e); - break; - case BT_PICCOLO: case BT_SPECTRUM: vga_wseq(regbase, CL_SEQR7, 0x87); /* Fast Page-Mode writes */ vga_wseq(regbase, CL_SEQRF, 0xb0); - /* MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x22); break; case BT_PICASSO: vga_wseq(regbase, CL_SEQR7, 0x27); /* Fast Page-Mode writes */ vga_wseq(regbase, CL_SEQRF, 0xb0); - /* MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x22); break; + case BT_SD64: case BT_PICASSO4: - vga_wseq(regbase, CL_SEQR7, 0x27); -/* vga_wseq(regbase, CL_SEQR1F, 0x1c); */ - break; - case BT_ALPINE: - DPRINTK(" (for GD543x)\n"); - vga_wseq(regbase, CL_SEQR7, 0xa7); + /* Extended Sequencer Mode: 256c col. mode */ + vga_wseq(regbase, CL_SEQR7, + cinfo->doubleVCLK ? 0xa3 : 0xa7); break; case BT_GD5480: - DPRINTK(" (for GD5480)\n"); vga_wseq(regbase, CL_SEQR7, 0x17); /* We already set SRF and SR1F */ break; case BT_LAGUNA: - DPRINTK(" (for GD546x)\n"); + case BT_LAGUNAB: vga_wseq(regbase, CL_SEQR7, vga_rseq(regbase, CL_SEQR7) & ~0x01); + control |= 0x2000; + format |= 0x1400; + threshold |= 0x10; break; default: - printk(KERN_WARNING "CIRRUSFB: unknown Board\n"); + dev_warn(info->device, "unknown Board\n"); break; } /* mode register: 256 color mode */ vga_wgfx(regbase, VGA_GFX_MODE, 64); - /* pixel mask: pass-through all planes */ - WGen(cinfo, VGA_PEL_MSK, 0xff); #ifdef CONFIG_PCI - WHDR(cinfo, 0xc0); /* Copy Xbh */ + WHDR(cinfo, cinfo->doubleVCLK ? 0xe1 : 0xc1); #elif defined(CONFIG_ZORRO) /* FIXME: CONFIG_PCI and CONFIG_ZORRO may be defined both */ WHDR(cinfo, 0xa0); /* hidden dac reg: nothing special */ #endif - /* memory mode: chain4, ext. memory */ - vga_wseq(regbase, VGA_SEQ_MEMORY_MODE, 0x0a); - /* plane mask: enable writing to all 4 planes */ - vga_wseq(regbase, VGA_SEQ_PLANE_WRITE, 0xff); - offset = var->xres_virtual / 4; } /****************************************************** * - * 32 bpp + * 24 bpp * */ - else if (var->bits_per_pixel == 32) { - DPRINTK("cirrusfb: preparing for 32 bit deep display\n"); + else if (var->bits_per_pixel == 24) { + dev_dbg(info->device, "preparing for 24 bit deep display\n"); switch (cinfo->btype) { - case BT_SD64: - /* Extended Sequencer Mode: 256c col. mode */ - vga_wseq(regbase, CL_SEQR7, 0xf9); - /* MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x1e); - break; - case BT_PICCOLO: case BT_SPECTRUM: vga_wseq(regbase, CL_SEQR7, 0x85); /* Fast Page-Mode writes */ vga_wseq(regbase, CL_SEQRF, 0xb0); - /* MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x22); break; case BT_PICASSO: vga_wseq(regbase, CL_SEQR7, 0x25); /* Fast Page-Mode writes */ vga_wseq(regbase, CL_SEQRF, 0xb0); - /* MCLK select */ - vga_wseq(regbase, CL_SEQR1F, 0x22); break; + case BT_SD64: case BT_PICASSO4: - vga_wseq(regbase, CL_SEQR7, 0x25); -/* vga_wseq(regbase, CL_SEQR1F, 0x1c); */ - break; - case BT_ALPINE: - DPRINTK(" (for GD543x)\n"); - vga_wseq(regbase, CL_SEQR7, 0xa9); + /* Extended Sequencer Mode: 256c col. mode */ + vga_wseq(regbase, CL_SEQR7, 0xa5); break; case BT_GD5480: - DPRINTK(" (for GD5480)\n"); - vga_wseq(regbase, CL_SEQR7, 0x19); + vga_wseq(regbase, CL_SEQR7, 0x15); /* We already set SRF and SR1F */ break; case BT_LAGUNA: - DPRINTK(" (for GD546x)\n"); + case BT_LAGUNAB: vga_wseq(regbase, CL_SEQR7, vga_rseq(regbase, CL_SEQR7) & ~0x01); + control |= 0x4000; + format |= 0x2400; + threshold |= 0x20; break; default: - printk(KERN_WARNING "cirrusfb: unknown Board\n"); + dev_warn(info->device, "unknown Board\n"); break; } /* mode register: 256 color mode */ vga_wgfx(regbase, VGA_GFX_MODE, 64); - /* pixel mask: pass-through all planes */ - WGen(cinfo, VGA_PEL_MSK, 0xff); /* hidden dac reg: 8-8-8 mode (24 or 32) */ WHDR(cinfo, 0xc5); - /* memory mode: chain4, ext. memory */ - vga_wseq(regbase, VGA_SEQ_MEMORY_MODE, 0x0a); - /* plane mask: enable writing to all 4 planes */ - vga_wseq(regbase, VGA_SEQ_PLANE_WRITE, 0xff); - offset = var->xres_virtual / 4; } /****************************************************** @@ -1284,67 +1200,55 @@ static int cirrusfb_set_par_foo(struct fb_info *info) */ else - printk(KERN_ERR "cirrusfb: What's this?? " - " requested color depth == %d.\n", + dev_err(info->device, + "What's this? requested color depth == %d.\n", var->bits_per_pixel); - vga_wcrt(regbase, VGA_CRTC_OFFSET, offset & 0xff); + pitch = info->fix.line_length >> 3; + vga_wcrt(regbase, VGA_CRTC_OFFSET, pitch & 0xff); tmp = 0x22; - if (offset & 0x100) + if (pitch & 0x100) tmp |= 0x10; /* offset overflow bit */ /* screen start addr #16-18, fastpagemode cycles */ vga_wcrt(regbase, CL_CRT1B, tmp); - if (cinfo->btype == BT_SD64 || - cinfo->btype == BT_PICASSO4 || - cinfo->btype == BT_ALPINE || - cinfo->btype == BT_GD5480) - /* screen start address bit 19 */ - vga_wcrt(regbase, CL_CRT1D, 0x00); - - /* text cursor location high */ - vga_wcrt(regbase, VGA_CRTC_CURSOR_HI, 0); - /* text cursor location low */ - vga_wcrt(regbase, VGA_CRTC_CURSOR_LO, 0); - /* underline row scanline = at very bottom */ - vga_wcrt(regbase, VGA_CRTC_UNDERLINE, 0); - - /* controller mode */ - vga_wattr(regbase, VGA_ATC_MODE, 1); - /* overscan (border) color */ - vga_wattr(regbase, VGA_ATC_OVERSCAN, 0); - /* color plane enable */ - vga_wattr(regbase, VGA_ATC_PLANE_ENABLE, 15); + /* screen start address bit 19 */ + if (cirrusfb_board_info[cinfo->btype].scrn_start_bit19) + vga_wcrt(regbase, CL_CRT1D, (pitch >> 9) & 1); + + if (is_laguna(cinfo)) { + tmp = 0; + if ((htotal + 5) & 256) + tmp |= 128; + if (hdispend & 256) + tmp |= 64; + if (hsyncstart & 256) + tmp |= 48; + if (vtotal & 1024) + tmp |= 8; + if (vdispend & 1024) + tmp |= 4; + if (vsyncstart & 1024) + tmp |= 3; + + vga_wcrt(regbase, CL_CRT1E, tmp); + dev_dbg(info->device, "CRT1e: %d\n", tmp); + } + /* pixel panning */ vga_wattr(regbase, CL_AR33, 0); - /* color select */ - vga_wattr(regbase, VGA_ATC_COLOR_PAGE, 0); /* [ EGS: SetOffset(); ] */ /* From SetOffset(): Turn on VideoEnable bit in Attribute controller */ AttrOn(cinfo); - /* set/reset register */ - vga_wgfx(regbase, VGA_GFX_SR_VALUE, 0); - /* set/reset enable */ - vga_wgfx(regbase, VGA_GFX_SR_ENABLE, 0); - /* color compare */ - vga_wgfx(regbase, VGA_GFX_COMPARE_VALUE, 0); - /* data rotate */ - vga_wgfx(regbase, VGA_GFX_DATA_ROTATE, 0); - /* read map select */ - vga_wgfx(regbase, VGA_GFX_PLANE_READ, 0); - /* miscellaneous register */ - vga_wgfx(regbase, VGA_GFX_MISC, 1); - /* color don't care */ - vga_wgfx(regbase, VGA_GFX_COMPARE_MASK, 15); - /* bit mask */ - vga_wgfx(regbase, VGA_GFX_BIT_MASK, 255); - - /* graphics cursor attributes: nothing special */ - vga_wseq(regbase, CL_SEQR12, 0x0); - + if (is_laguna(cinfo)) { + /* no tiles */ + fb_writew(control | 0x1000, cinfo->laguna_mmio + 0x402); + fb_writew(format, cinfo->laguna_mmio + 0xc0); + fb_writew(threshold, cinfo->laguna_mmio + 0xea); + } /* finally, turn on everything - turn off "FullBandwidth" bit */ /* also, set "DotClock%2" bit where requested */ tmp = 0x01; @@ -1355,18 +1259,12 @@ static int cirrusfb_set_par_foo(struct fb_info *info) */ vga_wseq(regbase, VGA_SEQ_CLOCK_MODE, tmp); - DPRINTK("CL_SEQR1: %d\n", tmp); - - cinfo->currentmode = regs; - - /* pan to requested offset */ - cirrusfb_pan_display(var, info); + dev_dbg(info->device, "CL_SEQR1: %d\n", tmp); #ifdef CIRRUSFB_DEBUG - cirrusfb_dump(); + cirrusfb_dbg_reg_dump(info, NULL); #endif - DPRINTK("EXIT\n"); return 0; } @@ -1418,27 +1316,19 @@ static int cirrusfb_setcolreg(unsigned regno, unsigned red, unsigned green, static int cirrusfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) { - int xoffset = 0; - int yoffset = 0; + int xoffset; unsigned long base; - unsigned char tmp = 0, tmp2 = 0, xpix; + unsigned char tmp, xpix; struct cirrusfb_info *cinfo = info->par; - DPRINTK("ENTER\n"); - DPRINTK("virtual offset: (%d,%d)\n", var->xoffset, var->yoffset); - /* no range checks for xoffset and yoffset, */ /* as fb_pan_display has already done this */ if (var->vmode & FB_VMODE_YWRAP) return -EINVAL; - info->var.xoffset = var->xoffset; - info->var.yoffset = var->yoffset; - xoffset = var->xoffset * info->var.bits_per_pixel / 8; - yoffset = var->yoffset; - base = yoffset * info->fix.line_length + xoffset; + base = var->yoffset * info->fix.line_length + xoffset; if (info->var.bits_per_pixel == 1) { /* base is already correct */ @@ -1448,14 +1338,15 @@ static int cirrusfb_pan_display(struct fb_var_screeninfo *var, xpix = (unsigned char) ((xoffset % 4) * 2); } - cirrusfb_WaitBLT(cinfo->regbase); /* make sure all the BLT's are done */ + if (!is_laguna(cinfo)) + cirrusfb_WaitBLT(cinfo->regbase); /* lower 8 + 8 bits of screen start address */ - vga_wcrt(cinfo->regbase, VGA_CRTC_START_LO, - (unsigned char) (base & 0xff)); - vga_wcrt(cinfo->regbase, VGA_CRTC_START_HI, - (unsigned char) (base >> 8)); + vga_wcrt(cinfo->regbase, VGA_CRTC_START_LO, base & 0xff); + vga_wcrt(cinfo->regbase, VGA_CRTC_START_HI, (base >> 8) & 0xff); + /* 0xf2 is %11110010, exclude tmp bits */ + tmp = vga_rcrt(cinfo->regbase, CL_CRT1B) & 0xf2; /* construct bits 16, 17 and 18 of screen start address */ if (base & 0x10000) tmp |= 0x01; @@ -1464,13 +1355,17 @@ static int cirrusfb_pan_display(struct fb_var_screeninfo *var, if (base & 0x40000) tmp |= 0x08; - /* 0xf2 is %11110010, exclude tmp bits */ - tmp2 = (vga_rcrt(cinfo->regbase, CL_CRT1B) & 0xf2) | tmp; - vga_wcrt(cinfo->regbase, CL_CRT1B, tmp2); + vga_wcrt(cinfo->regbase, CL_CRT1B, tmp); /* construct bit 19 of screen start address */ - if (cirrusfb_board_info[cinfo->btype].scrn_start_bit19) - vga_wcrt(cinfo->regbase, CL_CRT1D, (base >> 12) & 0x80); + if (cirrusfb_board_info[cinfo->btype].scrn_start_bit19) { + tmp = vga_rcrt(cinfo->regbase, CL_CRT1D); + if (is_laguna(cinfo)) + tmp = (tmp & ~0x18) | ((base >> 16) & 0x18); + else + tmp = (tmp & ~0x80) | ((base >> 12) & 0x80); + vga_wcrt(cinfo->regbase, CL_CRT1D, tmp); + } /* write pixel panning value to AR33; this does not quite work in 8bpp * @@ -1479,9 +1374,6 @@ static int cirrusfb_pan_display(struct fb_var_screeninfo *var, if (info->var.bits_per_pixel == 1) vga_wattr(cinfo->regbase, CL_AR33, xpix); - cirrusfb_WaitBLT(cinfo->regbase); - - DPRINTK("EXIT\n"); return 0; } @@ -1502,57 +1394,54 @@ static int cirrusfb_blank(int blank_mode, struct fb_info *info) struct cirrusfb_info *cinfo = info->par; int current_mode = cinfo->blank_mode; - DPRINTK("ENTER, blank mode = %d\n", blank_mode); + dev_dbg(info->device, "ENTER, blank mode = %d\n", blank_mode); if (info->state != FBINFO_STATE_RUNNING || current_mode == blank_mode) { - DPRINTK("EXIT, returning 0\n"); + dev_dbg(info->device, "EXIT, returning 0\n"); return 0; } /* Undo current */ if (current_mode == FB_BLANK_NORMAL || - current_mode == FB_BLANK_UNBLANK) { - /* unblank the screen */ - val = vga_rseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE); + current_mode == FB_BLANK_UNBLANK) /* clear "FullBandwidth" bit */ - vga_wseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE, val & 0xdf); - /* and undo VESA suspend trickery */ - vga_wgfx(cinfo->regbase, CL_GRE, 0x00); - } - - /* set new */ - if (blank_mode > FB_BLANK_NORMAL) { - /* blank the screen */ - val = vga_rseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE); + val = 0; + else /* set "FullBandwidth" bit */ - vga_wseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE, val | 0x20); - } + val = 0x20; + + val |= vga_rseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE) & 0xdf; + vga_wseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE, val); switch (blank_mode) { case FB_BLANK_UNBLANK: case FB_BLANK_NORMAL: + val = 0x00; break; case FB_BLANK_VSYNC_SUSPEND: - vga_wgfx(cinfo->regbase, CL_GRE, 0x04); + val = 0x04; break; case FB_BLANK_HSYNC_SUSPEND: - vga_wgfx(cinfo->regbase, CL_GRE, 0x02); + val = 0x02; break; case FB_BLANK_POWERDOWN: - vga_wgfx(cinfo->regbase, CL_GRE, 0x06); + val = 0x06; break; default: - DPRINTK("EXIT, returning 1\n"); + dev_dbg(info->device, "EXIT, returning 1\n"); return 1; } + vga_wgfx(cinfo->regbase, CL_GRE, val); + cinfo->blank_mode = blank_mode; - DPRINTK("EXIT, returning 0\n"); + dev_dbg(info->device, "EXIT, returning 0\n"); /* Let fbcon do a soft blank for us */ return (blank_mode == FB_BLANK_NORMAL) ? 1 : 0; } + /**** END Hardware specific Routines **************************************/ /****************************************************************************/ /**** BEGIN Internal Routines ***********************************************/ @@ -1562,8 +1451,6 @@ static void init_vgachip(struct fb_info *info) struct cirrusfb_info *cinfo = info->par; const struct cirrusfb_board_info_rec *bi; - DPRINTK("ENTER\n"); - assert(cinfo != NULL); bi = &cirrusfb_board_info[cinfo->btype]; @@ -1591,25 +1478,23 @@ static void init_vgachip(struct fb_info *info) /* disable flickerfixer */ vga_wcrt(cinfo->regbase, CL_CRT51, 0x00); mdelay(100); - /* from Klaus' NetBSD driver: */ - vga_wgfx(cinfo->regbase, CL_GR2F, 0x00); - /* put blitter into 542x compat */ - vga_wgfx(cinfo->regbase, CL_GR33, 0x00); /* mode */ vga_wgfx(cinfo->regbase, CL_GR31, 0x00); - break; - - case BT_GD5480: + case BT_GD5480: /* fall through */ /* from Klaus' NetBSD driver: */ vga_wgfx(cinfo->regbase, CL_GR2F, 0x00); + case BT_ALPINE: /* fall through */ + /* put blitter into 542x compat */ + vga_wgfx(cinfo->regbase, CL_GR33, 0x00); break; - case BT_ALPINE: + case BT_LAGUNA: + case BT_LAGUNAB: /* Nothing to do to reset the board. */ break; default: - printk(KERN_ERR "cirrusfb: Warning: Unknown board type\n"); + dev_err(info->device, "Warning: Unknown board type\n"); break; } @@ -1629,31 +1514,28 @@ static void init_vgachip(struct fb_info *info) WGen(cinfo, CL_VSSM2, 0x01); /* reset sequencer logic */ - vga_wseq(cinfo->regbase, CL_SEQR0, 0x03); + vga_wseq(cinfo->regbase, VGA_SEQ_RESET, 0x03); /* FullBandwidth (video off) and 8/9 dot clock */ vga_wseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE, 0x21); - /* polarity (-/-), disable access to display memory, - * VGA_CRTC_START_HI base address: color - */ - WGen(cinfo, VGA_MIS_W, 0xc1); /* "magic cookie" - doesn't make any sense to me.. */ /* vga_wgfx(cinfo->regbase, CL_GRA, 0xce); */ /* unlock all extension registers */ vga_wseq(cinfo->regbase, CL_SEQR6, 0x12); - /* reset blitter */ - vga_wgfx(cinfo->regbase, CL_GR31, 0x04); - switch (cinfo->btype) { case BT_GD5480: vga_wseq(cinfo->regbase, CL_SEQRF, 0x98); break; case BT_ALPINE: + case BT_LAGUNA: + case BT_LAGUNAB: break; case BT_SD64: +#ifdef CONFIG_ZORRO vga_wseq(cinfo->regbase, CL_SEQRF, 0xb8); +#endif break; default: vga_wseq(cinfo->regbase, CL_SEQR16, 0x0f); @@ -1665,8 +1547,8 @@ static void init_vgachip(struct fb_info *info) vga_wseq(cinfo->regbase, VGA_SEQ_PLANE_WRITE, 0xff); /* character map select: doesn't even matter in gx mode */ vga_wseq(cinfo->regbase, VGA_SEQ_CHARACTER_MAP, 0x00); - /* memory mode: chain-4, no odd/even, ext. memory */ - vga_wseq(cinfo->regbase, VGA_SEQ_MEMORY_MODE, 0x0e); + /* memory mode: chain4, ext. memory */ + vga_wseq(cinfo->regbase, VGA_SEQ_MEMORY_MODE, 0x0a); /* controller-internal base address of video memory */ if (bi->init_sr07) @@ -1692,20 +1574,12 @@ static void init_vgachip(struct fb_info *info) vga_wseq(cinfo->regbase, CL_SEQR18, 0x02); } - /* MCLK select etc. */ - if (bi->init_sr1f) - vga_wseq(cinfo->regbase, CL_SEQR1F, bi->sr1f); - /* Screen A preset row scan: none */ vga_wcrt(cinfo->regbase, VGA_CRTC_PRESET_ROW, 0x00); /* Text cursor start: disable text cursor */ vga_wcrt(cinfo->regbase, VGA_CRTC_CURSOR_START, 0x20); /* Text cursor end: - */ vga_wcrt(cinfo->regbase, VGA_CRTC_CURSOR_END, 0x00); - /* Screen start address high: 0 */ - vga_wcrt(cinfo->regbase, VGA_CRTC_START_HI, 0x00); - /* Screen start address low: 0 */ - vga_wcrt(cinfo->regbase, VGA_CRTC_START_LO, 0x00); /* text cursor location high: 0 */ vga_wcrt(cinfo->regbase, VGA_CRTC_CURSOR_HI, 0x00); /* text cursor location low: 0 */ @@ -1713,10 +1587,6 @@ static void init_vgachip(struct fb_info *info) /* Underline Row scanline: - */ vga_wcrt(cinfo->regbase, VGA_CRTC_UNDERLINE, 0x00); - /* mode control: timing enable, byte mode, no compat modes */ - vga_wcrt(cinfo->regbase, VGA_CRTC_MODE, 0xc3); - /* Line Compare: not needed */ - vga_wcrt(cinfo->regbase, VGA_CRTC_LINE_COMPARE, 0x00); /* ### add 0x40 for text modes with > 30 MHz pixclock */ /* ext. display controls: ext.adr. wrap */ vga_wcrt(cinfo->regbase, CL_CRT1B, 0x02); @@ -1739,7 +1609,9 @@ static void init_vgachip(struct fb_info *info) vga_wgfx(cinfo->regbase, VGA_GFX_COMPARE_MASK, 0x0f); /* Bit Mask: no mask at all */ vga_wgfx(cinfo->regbase, VGA_GFX_BIT_MASK, 0xff); - if (cinfo->btype == BT_ALPINE) + + if (cinfo->btype == BT_ALPINE || cinfo->btype == BT_SD64 || + is_laguna(cinfo)) /* (5434 can't have bit 3 set for bitblt) */ vga_wgfx(cinfo->regbase, CL_GRB, 0x20); else @@ -1779,18 +1651,11 @@ static void init_vgachip(struct fb_info *info) vga_wattr(cinfo->regbase, VGA_ATC_OVERSCAN, 0x00); /* Color Plane enable: Enable all 4 planes */ vga_wattr(cinfo->regbase, VGA_ATC_PLANE_ENABLE, 0x0f); -/* ### vga_wattr(cinfo->regbase, CL_AR33, 0x00); * Pixel Panning: - */ /* Color Select: - */ vga_wattr(cinfo->regbase, VGA_ATC_COLOR_PAGE, 0x00); WGen(cinfo, VGA_PEL_MSK, 0xff); /* Pixel mask: no mask */ - if (cinfo->btype != BT_ALPINE && cinfo->btype != BT_GD5480) - /* polarity (-/-), enable display mem, - * VGA_CRTC_START_HI i/o base = color - */ - WGen(cinfo, VGA_MIS_W, 0xc3); - /* BLT Start/status: Blitter reset */ vga_wgfx(cinfo->regbase, CL_GR31, 0x04); /* - " - : "end-of-reset" */ @@ -1798,8 +1663,6 @@ static void init_vgachip(struct fb_info *info) /* misc... */ WHDR(cinfo, 0); /* Hidden DAC register: - */ - - DPRINTK("EXIT\n"); return; } @@ -1808,8 +1671,6 @@ static void switch_monitor(struct cirrusfb_info *cinfo, int on) #ifdef CONFIG_ZORRO /* only works on Zorro boards */ static int IsOn = 0; /* XXX not ok for multiple boards */ - DPRINTK("ENTER\n"); - if (cinfo->btype == BT_PICASSO4) return; /* nothing to switch */ if (cinfo->btype == BT_ALPINE) @@ -1819,8 +1680,6 @@ static void switch_monitor(struct cirrusfb_info *cinfo, int on) if (cinfo->btype == BT_PICASSO) { if ((on && !IsOn) || (!on && IsOn)) WSFR(cinfo, 0xff); - - DPRINTK("EXIT\n"); return; } if (on) { @@ -1847,11 +1706,10 @@ static void switch_monitor(struct cirrusfb_info *cinfo, int on) case BT_SPECTRUM: WSFR(cinfo, 0x4f); break; - default: /* do nothing */ break; + default: /* do nothing */ + break; } } - - DPRINTK("EXIT\n"); #endif /* CONFIG_ZORRO */ } @@ -1859,6 +1717,17 @@ static void switch_monitor(struct cirrusfb_info *cinfo, int on) /* Linux 2.6-style accelerated functions */ /******************************************/ +static int cirrusfb_sync(struct fb_info *info) +{ + struct cirrusfb_info *cinfo = info->par; + + if (!is_laguna(cinfo)) { + while (vga_rgfx(cinfo->regbase, CL_GR31) & 0x03) + cpu_relax(); + } + return 0; +} + static void cirrusfb_fillrect(struct fb_info *info, const struct fb_fillrect *region) { @@ -1894,8 +1763,8 @@ static void cirrusfb_fillrect(struct fb_info *info, info->var.bits_per_pixel, (region->dx * m) / 8, region->dy, (region->width * m) / 8, region->height, - color, - info->fix.line_length); + color, color, + info->fix.line_length, 0x40); } static void cirrusfb_copyarea(struct fb_info *info, @@ -1943,9 +1812,46 @@ static void cirrusfb_imageblit(struct fb_info *info, const struct fb_image *image) { struct cirrusfb_info *cinfo = info->par; + unsigned char op = (info->var.bits_per_pixel == 24) ? 0xc : 0x4; - cirrusfb_WaitBLT(cinfo->regbase); - cfb_imageblit(info, image); + if (info->state != FBINFO_STATE_RUNNING) + return; + /* Alpine/SD64 does not work at 24bpp ??? */ + if (info->flags & FBINFO_HWACCEL_DISABLED || image->depth != 1) + cfb_imageblit(info, image); + else if ((cinfo->btype == BT_ALPINE || cinfo->btype == BT_SD64) && + op == 0xc) + cfb_imageblit(info, image); + else { + unsigned size = ((image->width + 7) >> 3) * image->height; + int m = info->var.bits_per_pixel; + u32 fg, bg; + + if (info->var.bits_per_pixel == 8) { + fg = image->fg_color; + bg = image->bg_color; + } else { + fg = ((u32 *)(info->pseudo_palette))[image->fg_color]; + bg = ((u32 *)(info->pseudo_palette))[image->bg_color]; + } + if (info->var.bits_per_pixel == 24) { + /* clear background first */ + cirrusfb_RectFill(cinfo->regbase, + info->var.bits_per_pixel, + (image->dx * m) / 8, image->dy, + (image->width * m) / 8, + image->height, + bg, bg, + info->fix.line_length, 0x40); + } + cirrusfb_RectFill(cinfo->regbase, + info->var.bits_per_pixel, + (image->dx * m) / 8, image->dy, + (image->width * m) / 8, image->height, + fg, bg, + info->fix.line_length, op); + memcpy(info->screen_base, image->data, size); + } } #ifdef CONFIG_PPC_PREP @@ -1953,12 +1859,8 @@ static void cirrusfb_imageblit(struct fb_info *info, #define PREP_IO_BASE ((volatile unsigned char *) 0x80000000) static void get_prep_addrs(unsigned long *display, unsigned long *registers) { - DPRINTK("ENTER\n"); - *display = PREP_VIDEO_BASE; *registers = (unsigned long) PREP_IO_BASE; - - DPRINTK("EXIT\n"); } #endif /* CONFIG_PPC_PREP */ @@ -1970,40 +1872,43 @@ static int release_io_ports; * based on the DRAM bandwidth bit and DRAM bank switching bit. This * works with 1MB, 2MB and 4MB configurations (which the Motorola boards * seem to have. */ -static unsigned int __devinit cirrusfb_get_memsize(u8 __iomem *regbase) +static unsigned int __devinit cirrusfb_get_memsize(struct fb_info *info, + u8 __iomem *regbase) { unsigned long mem; - unsigned char SRF; + struct cirrusfb_info *cinfo = info->par; - DPRINTK("ENTER\n"); + if (is_laguna(cinfo)) { + unsigned char SR14 = vga_rseq(regbase, CL_SEQR14); - SRF = vga_rseq(regbase, CL_SEQRF); - switch ((SRF & 0x18)) { - case 0x08: - mem = 512 * 1024; - break; - case 0x10: - mem = 1024 * 1024; - break; - /* 64-bit DRAM data bus width; assume 2MB. Also indicates 2MB memory - * on the 5430. - */ - case 0x18: - mem = 2048 * 1024; - break; - default: - printk(KERN_WARNING "CLgenfb: Unknown memory size!\n"); - mem = 1024 * 1024; + mem = ((SR14 & 7) + 1) << 20; + } else { + unsigned char SRF = vga_rseq(regbase, CL_SEQRF); + switch ((SRF & 0x18)) { + case 0x08: + mem = 512 * 1024; + break; + case 0x10: + mem = 1024 * 1024; + break; + /* 64-bit DRAM data bus width; assume 2MB. + * Also indicates 2MB memory on the 5430. + */ + case 0x18: + mem = 2048 * 1024; + break; + default: + dev_warn(info->device, "Unknown memory size!\n"); + mem = 1024 * 1024; + } + /* If DRAM bank switching is enabled, there must be + * twice as much memory installed. (4MB on the 5434) + */ + if (cinfo->btype != BT_ALPINE && (SRF & 0x80) != 0) + mem *= 2; } - if (SRF & 0x80) - /* If DRAM bank switching is enabled, there must be twice as much - * memory installed. (4MB on the 5434) - */ - mem *= 2; /* TODO: Handling of GD5446/5480 (see XF86 sources ...) */ - - DPRINTK("EXIT\n"); return mem; } @@ -2014,8 +1919,6 @@ static void get_pci_addrs(const struct pci_dev *pdev, assert(display != NULL); assert(registers != NULL); - DPRINTK("ENTER\n"); - *display = 0; *registers = 0; @@ -2030,14 +1933,15 @@ static void get_pci_addrs(const struct pci_dev *pdev, } assert(*display != 0); - - DPRINTK("EXIT\n"); } static void cirrusfb_pci_unmap(struct fb_info *info) { struct pci_dev *pdev = to_pci_dev(info->device); + struct cirrusfb_info *cinfo = info->par; + if (cinfo->laguna_mmio == NULL) + iounmap(cinfo->laguna_mmio); iounmap(info->screen_base); #if 0 /* if system didn't claim this region, we would... */ release_mem_region(0xA0000, 65535); @@ -2067,6 +1971,22 @@ static void cirrusfb_zorro_unmap(struct fb_info *info) } #endif /* CONFIG_ZORRO */ +/* function table of the above functions */ +static struct fb_ops cirrusfb_ops = { + .owner = THIS_MODULE, + .fb_open = cirrusfb_open, + .fb_release = cirrusfb_release, + .fb_setcolreg = cirrusfb_setcolreg, + .fb_check_var = cirrusfb_check_var, + .fb_set_par = cirrusfb_set_par, + .fb_pan_display = cirrusfb_pan_display, + .fb_blank = cirrusfb_blank, + .fb_fillrect = cirrusfb_fillrect, + .fb_copyarea = cirrusfb_copyarea, + .fb_sync = cirrusfb_sync, + .fb_imageblit = cirrusfb_imageblit, +}; + static int __devinit cirrusfb_set_fbinfo(struct fb_info *info) { struct cirrusfb_info *cinfo = info->par; @@ -2077,10 +1997,16 @@ static int __devinit cirrusfb_set_fbinfo(struct fb_info *info) | FBINFO_HWACCEL_XPAN | FBINFO_HWACCEL_YPAN | FBINFO_HWACCEL_FILLRECT + | FBINFO_HWACCEL_IMAGEBLIT | FBINFO_HWACCEL_COPYAREA; - if (noaccel) + if (noaccel || is_laguna(cinfo)) { info->flags |= FBINFO_HWACCEL_DISABLED; + info->fix.accel = FB_ACCEL_NONE; + } else + info->fix.accel = FB_ACCEL_CIRRUS_ALPINE; + info->fbops = &cirrusfb_ops; + if (cinfo->btype == BT_GD5480) { if (var->bits_per_pixel == 16) info->screen_base += 1 * MB_; @@ -2104,7 +2030,6 @@ static int __devinit cirrusfb_set_fbinfo(struct fb_info *info) /* FIXME: map region at 0xB8000 if available, fill in here */ info->fix.mmio_len = 0; - info->fix.accel = FB_ACCEL_NONE; fb_alloc_cmap(&info->cmap, 256, 0); @@ -2115,70 +2040,56 @@ static int __devinit cirrusfb_register(struct fb_info *info) { struct cirrusfb_info *cinfo = info->par; int err; - enum cirrus_board btype; - - DPRINTK("ENTER\n"); - - printk(KERN_INFO "cirrusfb: Driver for Cirrus Logic based " - "graphic boards, v" CIRRUSFB_VERSION "\n"); - - btype = cinfo->btype; /* sanity checks */ - assert(btype != BT_NONE); + assert(cinfo->btype != BT_NONE); /* set all the vital stuff */ cirrusfb_set_fbinfo(info); - DPRINTK("cirrusfb: (RAM start set to: 0x%p)\n", info->screen_base); + dev_dbg(info->device, "(RAM start set to: 0x%p)\n", info->screen_base); err = fb_find_mode(&info->var, info, mode_option, NULL, 0, NULL, 8); if (!err) { - DPRINTK("wrong initial video mode\n"); + dev_dbg(info->device, "wrong initial video mode\n"); err = -EINVAL; goto err_dealloc_cmap; } info->var.activate = FB_ACTIVATE_NOW; - err = cirrusfb_decode_var(&info->var, &cinfo->currentmode, info); + err = cirrusfb_check_var(&info->var, info); if (err < 0) { /* should never happen */ - DPRINTK("choking on default var... umm, no good.\n"); + dev_dbg(info->device, + "choking on default var... umm, no good.\n"); goto err_dealloc_cmap; } err = register_framebuffer(info); if (err < 0) { - printk(KERN_ERR "cirrusfb: could not register " - "fb device; err = %d!\n", err); + dev_err(info->device, + "could not register fb device; err = %d!\n", err); goto err_dealloc_cmap; } - DPRINTK("EXIT, returning 0\n"); return 0; err_dealloc_cmap: fb_dealloc_cmap(&info->cmap); - cinfo->unmap(info); - framebuffer_release(info); return err; } static void __devexit cirrusfb_cleanup(struct fb_info *info) { struct cirrusfb_info *cinfo = info->par; - DPRINTK("ENTER\n"); switch_monitor(cinfo, 0); - unregister_framebuffer(info); fb_dealloc_cmap(&info->cmap); - printk("Framebuffer unregistered\n"); + dev_dbg(info->device, "Framebuffer unregistered\n"); cinfo->unmap(info); framebuffer_release(info); - - DPRINTK("EXIT\n"); } #ifdef CONFIG_PCI @@ -2187,7 +2098,6 @@ static int __devinit cirrusfb_pci_register(struct pci_dev *pdev, { struct cirrusfb_info *cinfo; struct fb_info *info; - enum cirrus_board btype; unsigned long board_addr, board_size; int ret; @@ -2201,15 +2111,17 @@ static int __devinit cirrusfb_pci_register(struct pci_dev *pdev, if (!info) { printk(KERN_ERR "cirrusfb: could not allocate memory\n"); ret = -ENOMEM; - goto err_disable; + goto err_out; } cinfo = info->par; - cinfo->btype = btype = (enum cirrus_board) ent->driver_data; + cinfo->btype = (enum cirrus_board) ent->driver_data; - DPRINTK(" Found PCI device, base address 0 is 0x%x, btype set to %d\n", - pdev->resource[0].start, btype); - DPRINTK(" base address 1 is 0x%x\n", pdev->resource[1].start); + dev_dbg(info->device, + " Found PCI device, base address 0 is 0x%Lx, btype set to %d\n", + (unsigned long long)pdev->resource[0].start, cinfo->btype); + dev_dbg(info->device, " base address 1 is 0x%Lx\n", + (unsigned long long)pdev->resource[1].start); if (isPReP) { pci_write_config_dword(pdev, PCI_BASE_ADDRESS_0, 0x00000000); @@ -2219,30 +2131,30 @@ static int __devinit cirrusfb_pci_register(struct pci_dev *pdev, /* PReP dies if we ioremap the IO registers, but it works w/out... */ cinfo->regbase = (char __iomem *) info->fix.mmio_start; } else { - DPRINTK("Attempt to get PCI info for Cirrus Graphics Card\n"); + dev_dbg(info->device, + "Attempt to get PCI info for Cirrus Graphics Card\n"); get_pci_addrs(pdev, &board_addr, &info->fix.mmio_start); /* FIXME: this forces VGA. alternatives? */ cinfo->regbase = NULL; + cinfo->laguna_mmio = ioremap(info->fix.mmio_start, 0x1000); } - DPRINTK("Board address: 0x%lx, register address: 0x%lx\n", + dev_dbg(info->device, "Board address: 0x%lx, register address: 0x%lx\n", board_addr, info->fix.mmio_start); - board_size = (btype == BT_GD5480) ? - 32 * MB_ : cirrusfb_get_memsize(cinfo->regbase); + board_size = (cinfo->btype == BT_GD5480) ? + 32 * MB_ : cirrusfb_get_memsize(info, cinfo->regbase); ret = pci_request_regions(pdev, "cirrusfb"); if (ret < 0) { - printk(KERN_ERR "cirrusfb: cannot reserve region 0x%lx, " - "abort\n", - board_addr); + dev_err(info->device, "cannot reserve region 0x%lx, abort\n", + board_addr); goto err_release_fb; } #if 0 /* if the system didn't claim this region, we would... */ if (!request_mem_region(0xA0000, 65535, "cirrusfb")) { - printk(KERN_ERR "cirrusfb: cannot reserve region 0x%lx, abort\n" -, - 0xA0000L); + dev_err(info->device, "cannot reserve region 0x%lx, abort\n", + 0xA0000L); ret = -EBUSY; goto err_release_regions; } @@ -2260,16 +2172,17 @@ static int __devinit cirrusfb_pci_register(struct pci_dev *pdev, info->screen_size = board_size; cinfo->unmap = cirrusfb_pci_unmap; - printk(KERN_INFO "RAM (%lu kB) at 0x%lx, Cirrus " - "Logic chipset on PCI bus\n", - info->screen_size >> 10, board_addr); + dev_info(info->device, + "Cirrus Logic chipset on PCI bus, RAM (%lu kB) at 0x%lx\n", + info->screen_size >> 10, board_addr); pci_set_drvdata(pdev, info); ret = cirrusfb_register(info); - if (ret) - iounmap(info->screen_base); - return ret; + if (!ret) + return 0; + pci_set_drvdata(pdev, NULL); + iounmap(info->screen_base); err_release_legacy: if (release_io_ports) release_region(0x3C0, 32); @@ -2279,8 +2192,9 @@ err_release_regions: #endif pci_release_regions(pdev); err_release_fb: + if (cinfo->laguna_mmio != NULL) + iounmap(cinfo->laguna_mmio); framebuffer_release(info); -err_disable: err_out: return ret; } @@ -2288,11 +2202,8 @@ err_out: static void __devexit cirrusfb_pci_unregister(struct pci_dev *pdev) { struct fb_info *info = pci_get_drvdata(pdev); - DPRINTK("ENTER\n"); cirrusfb_cleanup(info); - - DPRINTK("EXIT\n"); } static struct pci_driver cirrusfb_pci_driver = { @@ -2324,8 +2235,6 @@ static int __devinit cirrusfb_zorro_register(struct zorro_dev *z, if (cirrusfb_zorro_table2[btype].id2) z2 = zorro_find_device(cirrusfb_zorro_table2[btype].id2, NULL); size = cirrusfb_zorro_table2[btype].size; - printk(KERN_INFO "cirrusfb: %s board detected; ", - cirrusfb_board_info[btype].name); info = framebuffer_alloc(sizeof(struct cirrusfb_info), &z->dev); if (!info) { @@ -2334,6 +2243,9 @@ static int __devinit cirrusfb_zorro_register(struct zorro_dev *z, goto err_out; } + dev_info(info->device, "%s board detected\n", + cirrusfb_board_info[btype].name); + cinfo = info->par; cinfo->btype = btype; @@ -2345,19 +2257,16 @@ static int __devinit cirrusfb_zorro_register(struct zorro_dev *z, info->screen_size = size; if (!zorro_request_device(z, "cirrusfb")) { - printk(KERN_ERR "cirrusfb: cannot reserve region 0x%lx, " - "abort\n", - board_addr); + dev_err(info->device, "cannot reserve region 0x%lx, abort\n", + board_addr); ret = -EBUSY; goto err_release_fb; } - printk(" RAM (%lu MB) at $%lx, ", board_size / MB_, board_addr); - ret = -EIO; if (btype == BT_PICASSO4) { - printk(KERN_INFO " REG at $%lx\n", board_addr + 0x600000); + dev_info(info->device, " REG at $%lx\n", board_addr + 0x600000); /* To be precise, for the P4 this is not the */ /* begin of the board, but the begin of RAM. */ @@ -2367,7 +2276,7 @@ static int __devinit cirrusfb_zorro_register(struct zorro_dev *z, if (!cinfo->regbase) goto err_release_region; - DPRINTK("cirrusfb: Virtual address for board set to: $%p\n", + dev_dbg(info->device, "Virtual address for board set to: $%p\n", cinfo->regbase); cinfo->regbase += 0x600000; info->fix.mmio_start = board_addr + 0x600000; @@ -2377,8 +2286,8 @@ static int __devinit cirrusfb_zorro_register(struct zorro_dev *z, if (!info->screen_base) goto err_unmap_regbase; } else { - printk(KERN_INFO " REG at $%lx\n", - (unsigned long) z2->resource.start); + dev_info(info->device, " REG at $%lx\n", + (unsigned long) z2->resource.start); info->fix.smem_start = board_addr; if (board_addr > 0x01000000) @@ -2392,27 +2301,32 @@ static int __devinit cirrusfb_zorro_register(struct zorro_dev *z, cinfo->regbase = (caddr_t) ZTWO_VADDR(z2->resource.start); info->fix.mmio_start = z2->resource.start; - DPRINTK("cirrusfb: Virtual address for board set to: $%p\n", + dev_dbg(info->device, "Virtual address for board set to: $%p\n", cinfo->regbase); } cinfo->unmap = cirrusfb_zorro_unmap; - printk(KERN_INFO "Cirrus Logic chipset on Zorro bus\n"); + dev_info(info->device, + "Cirrus Logic chipset on Zorro bus, RAM (%lu MB) at $%lx\n", + board_size / MB_, board_addr); + zorro_set_drvdata(z, info); + /* MCLK select etc. */ + if (cirrusfb_board_info[btype].init_sr1f) + vga_wseq(cinfo->regbase, CL_SEQR1F, + cirrusfb_board_info[btype].sr1f); + ret = cirrusfb_register(info); - if (ret) { - if (btype == BT_PICASSO4) { - iounmap(info->screen_base); - iounmap(cinfo->regbase - 0x600000); - } else if (board_addr > 0x01000000) - iounmap(info->screen_base); - } - return ret; + if (!ret) + return 0; + + if (btype == BT_PICASSO4 || board_addr > 0x01000000) + iounmap(info->screen_base); err_unmap_regbase: - /* Parental advisory: explicit hack */ - iounmap(cinfo->regbase - 0x600000); + if (btype == BT_PICASSO4) + iounmap(cinfo->regbase - 0x600000); err_release_region: release_region(board_addr, board_size); err_release_fb: @@ -2424,11 +2338,8 @@ err_out: void __devexit cirrusfb_zorro_unregister(struct zorro_dev *z) { struct fb_info *info = zorro_get_drvdata(z); - DPRINTK("ENTER\n"); cirrusfb_cleanup(info); - - DPRINTK("EXIT\n"); } static struct zorro_driver cirrusfb_zorro_driver = { @@ -2439,33 +2350,11 @@ static struct zorro_driver cirrusfb_zorro_driver = { }; #endif /* CONFIG_ZORRO */ -static int __init cirrusfb_init(void) -{ - int error = 0; - #ifndef MODULE - char *option = NULL; - - if (fb_get_options("cirrusfb", &option)) - return -ENODEV; - cirrusfb_setup(option); -#endif - -#ifdef CONFIG_ZORRO - error |= zorro_register_driver(&cirrusfb_zorro_driver); -#endif -#ifdef CONFIG_PCI - error |= pci_register_driver(&cirrusfb_pci_driver); -#endif - return error; -} - -#ifndef MODULE -static int __init cirrusfb_setup(char *options) { +static int __init cirrusfb_setup(char *options) +{ char *this_opt; - DPRINTK("ENTER\n"); - if (!options || !*options) return 0; @@ -2473,8 +2362,6 @@ static int __init cirrusfb_setup(char *options) { if (!*this_opt) continue; - DPRINTK("cirrusfb_setup: option '%s'\n", this_opt); - if (!strcmp(this_opt, "noaccel")) noaccel = 1; else if (!strncmp(this_opt, "mode:", 5)) @@ -2494,6 +2381,27 @@ MODULE_AUTHOR("Copyright 1999,2000 Jeff Garzik <jgarzik@pobox.com>"); MODULE_DESCRIPTION("Accelerated FBDev driver for Cirrus Logic chips"); MODULE_LICENSE("GPL"); +static int __init cirrusfb_init(void) +{ + int error = 0; + +#ifndef MODULE + char *option = NULL; + + if (fb_get_options("cirrusfb", &option)) + return -ENODEV; + cirrusfb_setup(option); +#endif + +#ifdef CONFIG_ZORRO + error |= zorro_register_driver(&cirrusfb_zorro_driver); +#endif +#ifdef CONFIG_PCI + error |= pci_register_driver(&cirrusfb_pci_driver); +#endif + return error; +} + static void __exit cirrusfb_exit(void) { #ifdef CONFIG_PCI @@ -2560,8 +2468,6 @@ static void AttrOn(const struct cirrusfb_info *cinfo) { assert(cinfo != NULL); - DPRINTK("ENTER\n"); - if (vga_rcrt(cinfo->regbase, CL_CRT24) & 0x80) { /* if we're just in "write value" mode, write back the */ /* same value as before to not modify anything */ @@ -2574,8 +2480,6 @@ static void AttrOn(const struct cirrusfb_info *cinfo) /* dummy write on Reg0 to be on "write index" mode next time */ vga_w(cinfo->regbase, VGA_ATT_IW, 0x00); - - DPRINTK("EXIT\n"); } /*** WHDR() - write into the Hidden DAC register ***/ @@ -2588,6 +2492,8 @@ static void WHDR(const struct cirrusfb_info *cinfo, unsigned char val) { unsigned char dummy; + if (is_laguna(cinfo)) + return; if (cinfo->btype == BT_PICASSO) { /* Klaus' hint for correct access to HDR on some boards */ /* first write 0 to pixel mask (3c6) */ @@ -2655,7 +2561,8 @@ static void WClut(struct cirrusfb_info *cinfo, unsigned char regnum, unsigned ch vga_w(cinfo->regbase, VGA_PEL_IW, regnum); if (cinfo->btype == BT_PICASSO || cinfo->btype == BT_PICASSO4 || - cinfo->btype == BT_ALPINE || cinfo->btype == BT_GD5480) { + cinfo->btype == BT_ALPINE || cinfo->btype == BT_GD5480 || + cinfo->btype == BT_SD64 || is_laguna(cinfo)) { /* but DAC data register IS, at least for Picasso II */ if (cinfo->btype == BT_PICASSO) data += 0xfff; @@ -2702,9 +2609,8 @@ static void RClut(struct cirrusfb_info *cinfo, unsigned char regnum, unsigned ch /* FIXME: use interrupts instead */ static void cirrusfb_WaitBLT(u8 __iomem *regbase) { - /* now busy-wait until we're done */ while (vga_rgfx(regbase, CL_GR31) & 0x08) - /* do nothing */ ; + cpu_relax(); } /******************************************************************* @@ -2713,60 +2619,12 @@ static void cirrusfb_WaitBLT(u8 __iomem *regbase) perform accelerated "scrolling" ********************************************************************/ -static void cirrusfb_BitBLT(u8 __iomem *regbase, int bits_per_pixel, - u_short curx, u_short cury, - u_short destx, u_short desty, - u_short width, u_short height, - u_short line_length) -{ - u_short nwidth, nheight; - u_long nsrc, ndest; - u_char bltmode; - - DPRINTK("ENTER\n"); - - nwidth = width - 1; - nheight = height - 1; - - bltmode = 0x00; - /* if source adr < dest addr, do the Blt backwards */ - if (cury <= desty) { - if (cury == desty) { - /* if src and dest are on the same line, check x */ - if (curx < destx) - bltmode |= 0x01; - } else - bltmode |= 0x01; - } - if (!bltmode) { - /* standard case: forward blitting */ - nsrc = (cury * line_length) + curx; - ndest = (desty * line_length) + destx; - } else { - /* this means start addresses are at the end, - * counting backwards - */ - nsrc = cury * line_length + curx + - nheight * line_length + nwidth; - ndest = desty * line_length + destx + - nheight * line_length + nwidth; - } - - /* - run-down of registers to be programmed: - destination pitch - source pitch - BLT width/height - source start - destination start - BLT mode - BLT ROP - VGA_GFX_SR_VALUE / VGA_GFX_SR_ENABLE: "fill color" - start/stop - */ - - cirrusfb_WaitBLT(regbase); +static void cirrusfb_set_blitter(u8 __iomem *regbase, + u_short nwidth, u_short nheight, + u_long nsrc, u_long ndest, + u_short bltmode, u_short line_length) +{ /* pitch: set to line_length */ /* dest pitch low */ vga_wgfx(regbase, CL_GR24, line_length & 0xff); @@ -2813,91 +2671,91 @@ static void cirrusfb_BitBLT(u8 __iomem *regbase, int bits_per_pixel, /* and finally: GO! */ vga_wgfx(regbase, CL_GR31, 0x02); /* BLT Start/status */ - - DPRINTK("EXIT\n"); } /******************************************************************* - cirrusfb_RectFill() + cirrusfb_BitBLT() - perform accelerated rectangle fill + perform accelerated "scrolling" ********************************************************************/ -static void cirrusfb_RectFill(u8 __iomem *regbase, int bits_per_pixel, - u_short x, u_short y, u_short width, u_short height, - u_char color, u_short line_length) +static void cirrusfb_BitBLT(u8 __iomem *regbase, int bits_per_pixel, + u_short curx, u_short cury, + u_short destx, u_short desty, + u_short width, u_short height, + u_short line_length) { - u_short nwidth, nheight; - u_long ndest; - u_char op; - - DPRINTK("ENTER\n"); - - nwidth = width - 1; - nheight = height - 1; + u_short nwidth = width - 1; + u_short nheight = height - 1; + u_long nsrc, ndest; + u_char bltmode; - ndest = (y * line_length) + x; + bltmode = 0x00; + /* if source adr < dest addr, do the Blt backwards */ + if (cury <= desty) { + if (cury == desty) { + /* if src and dest are on the same line, check x */ + if (curx < destx) + bltmode |= 0x01; + } else + bltmode |= 0x01; + } + /* standard case: forward blitting */ + nsrc = (cury * line_length) + curx; + ndest = (desty * line_length) + destx; + if (bltmode) { + /* this means start addresses are at the end, + * counting backwards + */ + nsrc += nheight * line_length + nwidth; + ndest += nheight * line_length + nwidth; + } cirrusfb_WaitBLT(regbase); - /* pitch: set to line_length */ - vga_wgfx(regbase, CL_GR24, line_length & 0xff); /* dest pitch low */ - vga_wgfx(regbase, CL_GR25, line_length >> 8); /* dest pitch hi */ - vga_wgfx(regbase, CL_GR26, line_length & 0xff); /* source pitch low */ - vga_wgfx(regbase, CL_GR27, line_length >> 8); /* source pitch hi */ + cirrusfb_set_blitter(regbase, nwidth, nheight, + nsrc, ndest, bltmode, line_length); +} - /* BLT width: actual number of pixels - 1 */ - vga_wgfx(regbase, CL_GR20, nwidth & 0xff); /* BLT width low */ - vga_wgfx(regbase, CL_GR21, nwidth >> 8); /* BLT width hi */ +/******************************************************************* + cirrusfb_RectFill() - /* BLT height: actual number of lines -1 */ - vga_wgfx(regbase, CL_GR22, nheight & 0xff); /* BLT height low */ - vga_wgfx(regbase, CL_GR23, nheight >> 8); /* BLT width hi */ + perform accelerated rectangle fill +********************************************************************/ - /* BLT destination */ - /* BLT dest low */ - vga_wgfx(regbase, CL_GR28, (u_char) (ndest & 0xff)); - /* BLT dest mid */ - vga_wgfx(regbase, CL_GR29, (u_char) (ndest >> 8)); - /* BLT dest hi */ - vga_wgfx(regbase, CL_GR2A, (u_char) (ndest >> 16)); +static void cirrusfb_RectFill(u8 __iomem *regbase, int bits_per_pixel, + u_short x, u_short y, u_short width, u_short height, + u32 fg_color, u32 bg_color, u_short line_length, + u_char blitmode) +{ + u_long ndest = (y * line_length) + x; + u_char op; - /* BLT source: set to 0 (is a dummy here anyway) */ - vga_wgfx(regbase, CL_GR2C, 0x00); /* BLT src low */ - vga_wgfx(regbase, CL_GR2D, 0x00); /* BLT src mid */ - vga_wgfx(regbase, CL_GR2E, 0x00); /* BLT src hi */ + cirrusfb_WaitBLT(regbase); /* This is a ColorExpand Blt, using the */ /* same color for foreground and background */ - vga_wgfx(regbase, VGA_GFX_SR_VALUE, color); /* foreground color */ - vga_wgfx(regbase, VGA_GFX_SR_ENABLE, color); /* background color */ - - op = 0xc0; - if (bits_per_pixel == 16) { - vga_wgfx(regbase, CL_GR10, color); /* foreground color */ - vga_wgfx(regbase, CL_GR11, color); /* background color */ - op = 0x50; - op = 0xd0; - } else if (bits_per_pixel == 32) { - vga_wgfx(regbase, CL_GR10, color); /* foreground color */ - vga_wgfx(regbase, CL_GR11, color); /* background color */ - vga_wgfx(regbase, CL_GR12, color); /* foreground color */ - vga_wgfx(regbase, CL_GR13, color); /* background color */ - vga_wgfx(regbase, CL_GR14, 0); /* foreground color */ - vga_wgfx(regbase, CL_GR15, 0); /* background color */ - op = 0x50; - op = 0xf0; - } - /* BLT mode: color expand, Enable 8x8 copy (faster?) */ - vga_wgfx(regbase, CL_GR30, op); /* BLT mode */ - - /* BLT ROP: SrcCopy */ - vga_wgfx(regbase, CL_GR32, 0x0d); /* BLT ROP */ - - /* and finally: GO! */ - vga_wgfx(regbase, CL_GR31, 0x02); /* BLT Start/status */ - - DPRINTK("EXIT\n"); + vga_wgfx(regbase, VGA_GFX_SR_VALUE, bg_color); + vga_wgfx(regbase, VGA_GFX_SR_ENABLE, fg_color); + + op = 0x80; + if (bits_per_pixel >= 16) { + vga_wgfx(regbase, CL_GR10, bg_color >> 8); + vga_wgfx(regbase, CL_GR11, fg_color >> 8); + op = 0x90; + } + if (bits_per_pixel >= 24) { + vga_wgfx(regbase, CL_GR12, bg_color >> 16); + vga_wgfx(regbase, CL_GR13, fg_color >> 16); + op = 0xa0; + } + if (bits_per_pixel == 32) { + vga_wgfx(regbase, CL_GR14, bg_color >> 24); + vga_wgfx(regbase, CL_GR15, fg_color >> 24); + op = 0xb0; + } + cirrusfb_set_blitter(regbase, width - 1, height - 1, + 0, ndest, op | blitmode, line_length); } /************************************************************************** @@ -2917,8 +2775,6 @@ static void bestclock(long freq, int *nom, int *den, int *div) *den = 0; *div = 0; - DPRINTK("ENTER\n"); - if (freq < 8000) freq = 8000; @@ -2960,12 +2816,6 @@ static void bestclock(long freq, int *nom, int *den, int *div) } } } - - DPRINTK("Best possible values for given frequency:\n"); - DPRINTK(" freq: %ld kHz nom: %d den: %d div: %d\n", - freq, *nom, *den, *div); - - DPRINTK("EXIT\n"); } /* ------------------------------------------------------------------------- @@ -2978,32 +2828,6 @@ static void bestclock(long freq, int *nom, int *den, int *div) #ifdef CIRRUSFB_DEBUG /** - * cirrusfb_dbg_print_byte - * @name: name associated with byte value to be displayed - * @val: byte value to be displayed - * - * DESCRIPTION: - * Display an indented string, along with a hexidecimal byte value, and - * its decoded bits. Bits 7 through 0 are listed in left-to-right - * order. - */ - -static -void cirrusfb_dbg_print_byte(const char *name, unsigned char val) -{ - DPRINTK("%8s = 0x%02X (bits 7-0: %c%c%c%c%c%c%c%c)\n", - name, val, - val & 0x80 ? '1' : '0', - val & 0x40 ? '1' : '0', - val & 0x20 ? '1' : '0', - val & 0x10 ? '1' : '0', - val & 0x08 ? '1' : '0', - val & 0x04 ? '1' : '0', - val & 0x02 ? '1' : '0', - val & 0x01 ? '1' : '0'); -} - -/** * cirrusfb_dbg_print_regs * @base: If using newmmio, the newmmio base address, otherwise %NULL * @reg_class: type of registers to read: %CRT, or %SEQ @@ -3014,9 +2838,9 @@ void cirrusfb_dbg_print_byte(const char *name, unsigned char val) * used at the given @base address to query the information. */ -static -void cirrusfb_dbg_print_regs(caddr_t regbase, - enum cirrusfb_dbg_reg_class reg_class, ...) +static void cirrusfb_dbg_print_regs(struct fb_info *info, + caddr_t regbase, + enum cirrusfb_dbg_reg_class reg_class, ...) { va_list list; unsigned char val = 0; @@ -3042,7 +2866,7 @@ void cirrusfb_dbg_print_regs(caddr_t regbase, break; } - cirrusfb_dbg_print_byte(name, val); + dev_dbg(info->device, "%8s = 0x%02X\n", name, val); name = va_arg(list, char *); } @@ -3051,18 +2875,6 @@ void cirrusfb_dbg_print_regs(caddr_t regbase, } /** - * cirrusfb_dump - * @cirrusfbinfo: - * - * DESCRIPTION: - */ - -static void cirrusfb_dump(void) -{ - cirrusfb_dbg_reg_dump(NULL); -} - -/** * cirrusfb_dbg_reg_dump * @base: If using newmmio, the newmmio base address, otherwise %NULL * @@ -3072,12 +2884,11 @@ static void cirrusfb_dump(void) * used at the given @base address to query the information. */ -static -void cirrusfb_dbg_reg_dump(caddr_t regbase) +static void cirrusfb_dbg_reg_dump(struct fb_info *info, caddr_t regbase) { - DPRINTK("CIRRUSFB VGA CRTC register dump:\n"); + dev_dbg(info->device, "VGA CRTC register dump:\n"); - cirrusfb_dbg_print_regs(regbase, CRT, + cirrusfb_dbg_print_regs(info, regbase, CRT, "CR00", 0x00, "CR01", 0x01, "CR02", 0x02, @@ -3127,11 +2938,11 @@ void cirrusfb_dbg_reg_dump(caddr_t regbase) "CR3F", 0x3F, NULL); - DPRINTK("\n"); + dev_dbg(info->device, "\n"); - DPRINTK("CIRRUSFB VGA SEQ register dump:\n"); + dev_dbg(info->device, "VGA SEQ register dump:\n"); - cirrusfb_dbg_print_regs(regbase, SEQ, + cirrusfb_dbg_print_regs(info, regbase, SEQ, "SR00", 0x00, "SR01", 0x01, "SR02", 0x02, @@ -3160,7 +2971,7 @@ void cirrusfb_dbg_reg_dump(caddr_t regbase) "SR1F", 0x1F, NULL); - DPRINTK("\n"); + dev_dbg(info->device, "\n"); } #endif /* CIRRUSFB_DEBUG */ diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 1657b9608b04..2cd500a304f2 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -2954,8 +2954,11 @@ static int fbcon_fb_unbind(int idx) static int fbcon_fb_unregistered(struct fb_info *info) { - int i, idx = info->node; + int i, idx; + if (!lock_fb_info(info)) + return -ENODEV; + idx = info->node; for (i = first_fb_vc; i <= last_fb_vc; i++) { if (con2fb_map[i] == idx) con2fb_map[i] = -1; @@ -2979,13 +2982,14 @@ static int fbcon_fb_unregistered(struct fb_info *info) } } - if (!num_registered_fb) - unregister_con_driver(&fb_con); - - if (primary_device == idx) primary_device = -1; + unlock_fb_info(info); + + if (!num_registered_fb) + unregister_con_driver(&fb_con); + return 0; } @@ -3021,9 +3025,13 @@ static inline void fbcon_select_primary(struct fb_info *info) static int fbcon_fb_registered(struct fb_info *info) { - int ret = 0, i, idx = info->node; + int ret = 0, i, idx; + if (!lock_fb_info(info)) + return -ENODEV; + idx = info->node; fbcon_select_primary(info); + unlock_fb_info(info); if (info_idx == -1) { for (i = first_fb_vc; i <= last_fb_vc; i++) { @@ -3124,7 +3132,7 @@ static void fbcon_get_requirement(struct fb_info *info, } } -static int fbcon_event_notify(struct notifier_block *self, +static int fbcon_event_notify(struct notifier_block *self, unsigned long action, void *data) { struct fb_event *event = data; @@ -3132,7 +3140,7 @@ static int fbcon_event_notify(struct notifier_block *self, struct fb_videomode *mode; struct fb_con2fbmap *con2fb; struct fb_blit_caps *caps; - int ret = 0; + int idx, ret = 0; /* * ignore all events except driver registration and deregistration @@ -3144,23 +3152,54 @@ static int fbcon_event_notify(struct notifier_block *self, switch(action) { case FB_EVENT_SUSPEND: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_suspended(info); + unlock_fb_info(info); break; case FB_EVENT_RESUME: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_resumed(info); + unlock_fb_info(info); break; case FB_EVENT_MODE_CHANGE: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_modechanged(info); + unlock_fb_info(info); break; case FB_EVENT_MODE_CHANGE_ALL: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_set_all_vcs(info); + unlock_fb_info(info); break; case FB_EVENT_MODE_DELETE: mode = event->data; + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } ret = fbcon_mode_deleted(info, mode); + unlock_fb_info(info); break; case FB_EVENT_FB_UNBIND: - ret = fbcon_fb_unbind(info->node); + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } + idx = info->node; + unlock_fb_info(info); + ret = fbcon_fb_unbind(idx); break; case FB_EVENT_FB_REGISTERED: ret = fbcon_fb_registered(info); @@ -3178,17 +3217,31 @@ static int fbcon_event_notify(struct notifier_block *self, con2fb->framebuffer = con2fb_map[con2fb->console - 1]; break; case FB_EVENT_BLANK: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_fb_blanked(info, *(int *)event->data); + unlock_fb_info(info); break; case FB_EVENT_NEW_MODELIST: + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_new_modelist(info); + unlock_fb_info(info); break; case FB_EVENT_GET_REQ: caps = event->data; + if (!lock_fb_info(info)) { + ret = -ENODEV; + goto done; + } fbcon_get_requirement(info, caps); + unlock_fb_info(info); break; } - done: return ret; } diff --git a/drivers/video/cyblafb.c b/drivers/video/cyblafb.c deleted file mode 100644 index 9704b73135f5..000000000000 --- a/drivers/video/cyblafb.c +++ /dev/null @@ -1,1683 +0,0 @@ -/* - * Frame buffer driver for Trident Cyberblade/i1 graphics core - * - * Copyright 2005 Knut Petersen <Knut_Petersen@t-online.de> - * - * CREDITS: - * tridentfb.c by Jani Monoses - * see files above for further credits - * - */ - -#define CYBLAFB_DEBUG 0 -#define CYBLAFB_KD_GRAPHICS_QUIRK 1 - -#define CYBLAFB_PIXMAPSIZE 8192 - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/fb.h> -#include <linux/init.h> -#include <linux/pci.h> -#include <asm/types.h> -#include <video/cyblafb.h> - -#define VERSION "0.62" - -struct cyblafb_par { - u32 pseudo_pal[16]; - struct fb_ops ops; -}; - -static struct fb_fix_screeninfo cyblafb_fix __devinitdata = { - .id = "CyBla", - .type = FB_TYPE_PACKED_PIXELS, - .xpanstep = 1, - .ypanstep = 1, - .ywrapstep = 1, - .visual = FB_VISUAL_PSEUDOCOLOR, - .accel = FB_ACCEL_NONE, -}; - -static char *mode __devinitdata = NULL; -static int bpp __devinitdata = 8; -static int ref __devinitdata = 75; -static int fp __devinitdata; -static int crt __devinitdata; -static int memsize __devinitdata; - -static int basestride; -static int vesafb; -static int nativex; -static int center; -static int stretch; -static int pciwb = 1; -static int pcirb = 1; -static int pciwr = 1; -static int pcirr = 1; -static int disabled; -static int verbosity; -static int displaytype; - -static void __iomem *io_virt; // iospace virtual memory address - -module_param(mode, charp, 0); -module_param(bpp, int, 0); -module_param(ref, int, 0); -module_param(fp, int, 0); -module_param(crt, int, 0); -module_param(nativex, int, 0); -module_param(center, int, 0); -module_param(stretch, int, 0); -module_param(pciwb, int, 0); -module_param(pcirb, int, 0); -module_param(pciwr, int, 0); -module_param(pcirr, int, 0); -module_param(memsize, int, 0); -module_param(verbosity, int, 0); - -//========================================= -// -// Well, we have to fix the upper layers. -// Until this has been done, we work around -// the bugs. -// -//========================================= - -#if (CYBLAFB_KD_GRAPHICS_QUIRK && CYBLAFB_DEBUG) - if (disabled) { \ - printk("********\n");\ - dump_stack();\ - return val;\ - } - -#elif CYBLAFB_KD_GRAPHICS_QUIRK -#define KD_GRAPHICS_RETURN(val)\ - if (disabled) {\ - return val;\ - } -#else -#define KD_GRAPHICS_RETURN(val) -#endif - -//========================================= -// -// Port access macros for memory mapped io -// -//========================================= - -#define out8(r, v) writeb(v, io_virt + r) -#define out32(r, v) writel(v, io_virt + r) -#define in8(r) readb(io_virt + r) -#define in32(r) readl(io_virt + r) - -//====================================== -// -// Hardware access inline functions -// -//====================================== - -static inline u8 read3X4(u32 reg) -{ - out8(0x3D4, reg); - return in8(0x3D5); -} - -static inline u8 read3C4(u32 reg) -{ - out8(0x3C4, reg); - return in8(0x3C5); -} - -static inline u8 read3CE(u32 reg) -{ - out8(0x3CE, reg); - return in8(0x3CF); -} - -static inline void write3X4(u32 reg, u8 val) -{ - out8(0x3D4, reg); - out8(0x3D5, val); -} - -static inline void write3C4(u32 reg, u8 val) -{ - out8(0x3C4, reg); - out8(0x3C5, val); -} - -static inline void write3CE(u32 reg, u8 val) -{ - out8(0x3CE, reg); - out8(0x3CF, val); -} - -static inline void write3C0(u32 reg, u8 val) -{ - in8(0x3DA); // read to reset index - out8(0x3C0, reg); - out8(0x3C0, val); -} - -//================================================= -// -// Enable memory mapped io and unprotect registers -// -//================================================= - -static void enable_mmio(void) -{ - u8 tmp; - - outb(0x0B, 0x3C4); - inb(0x3C5); // Set NEW mode - outb(SR0E, 0x3C4); // write enable a lot of extended ports - outb(0x80, 0x3C5); - - outb(SR11, 0x3C4); // write enable those extended ports that - outb(0x87, 0x3C5); // are not affected by SR0E_New - - outb(CR1E, 0x3d4); // clear write protect bit for port 0x3c2 - tmp = inb(0x3d5) & 0xBF; - outb(CR1E, 0x3d4); - outb(tmp, 0x3d5); - - outb(CR39, 0x3D4); - outb(inb(0x3D5) | 0x01, 0x3D5); // Enable mmio -} - -//================================================= -// -// Set pixel clock VCLK1 -// - multipliers set elswhere -// - freq in units of 0.01 MHz -// -// Hardware bug: SR18 >= 250 is broken for the -// cyberblade/i1 -// -//================================================= - -static void set_vclk(struct cyblafb_par *par, int freq) -{ - u32 m, n, k; - int f, fi, d, di; - u8 lo = 0, hi = 0; - - d = 2000; - k = freq >= 10000 ? 0 : freq >= 5000 ? 1 : freq >= 2500 ? 2 : 3; - for (m = 0; m < 64; m++) - for (n = 0; n < 250; n++) { - fi = (int)(((5864727 * (n + 8)) / - ((m + 2) * (1 << k))) >> 12); - if ((di = abs(fi - freq)) < d) { - d = di; - f = fi; - lo = (u8) n; - hi = (u8) ((k << 6) | m); - } - } - write3C4(SR19, hi); - write3C4(SR18, lo); - if (verbosity > 0) - output("pixclock = %d.%02d MHz, k/m/n %x %x %x\n", - freq / 100, freq % 100, (hi & 0xc0) >> 6, hi & 0x3f, lo); -} - -//================================================ -// -// Cyberblade specific Graphics Engine (GE) setup -// -//================================================ - -static void cyblafb_setup_GE(int pitch, int bpp) -{ - KD_GRAPHICS_RETURN(); - - switch (bpp) { - case 8: - basestride = ((pitch >> 3) << 20) | (0 << 29); - break; - case 15: - basestride = ((pitch >> 3) << 20) | (5 << 29); - break; - case 16: - basestride = ((pitch >> 3) << 20) | (1 << 29); - break; - case 24: - case 32: - basestride = ((pitch >> 3) << 20) | (2 << 29); - break; - } - - write3X4(CR36, 0x90); // reset GE - write3X4(CR36, 0x80); // enable GE - out32(GE24, 1 << 7); // reset all GE pointers by toggling - out32(GE24, 0); // d7 of GE24 - write3X4(CR2D, 0x00); // GE Timinigs, no delays - out32(GE6C, 0); // Pattern and Style, p 129, ok -} - -//===================================================================== -// -// Cyberblade specific syncing -// -// A timeout might be caused by disabled mmio. -// Cause: -// - bit CR39 & 1 == 0 upon return, X trident driver bug -// - kdm bug (KD_GRAPHICS not set on first switch) -// - kernel design flaw (it believes in the correctness -// of kdm/X -// First we try to sync ignoring that problem, as most of the -// time that will succeed immediately and the enable_mmio() -// would only degrade performance. -// -//===================================================================== - -static int cyblafb_sync(struct fb_info *info) -{ - u32 status, i = 100000; - - KD_GRAPHICS_RETURN(0); - - while (((status = in32(GE20)) & 0xFe800000) && i != 0) - i--; - - if (i == 0) { - enable_mmio(); - i = 1000000; - while (((status = in32(GE20)) & 0xFA800000) && i != 0) - i--; - if (i == 0) { - output("GE Timeout, status: %x\n", status); - if (status & 0x80000000) - output("Bresenham Engine : Busy\n"); - if (status & 0x40000000) - output("Setup Engine : Busy\n"); - if (status & 0x20000000) - output("SP / DPE : Busy\n"); - if (status & 0x10000000) - output("Memory Interface : Busy\n"); - if (status & 0x08000000) - output("Com Lst Proc : Busy\n"); - if (status & 0x04000000) - output("Block Write : Busy\n"); - if (status & 0x02000000) - output("Command Buffer : Full\n"); - if (status & 0x01000000) - output("RESERVED : Busy\n"); - if (status & 0x00800000) - output("PCI Write Buffer : Busy\n"); - cyblafb_setup_GE(info->var.xres, - info->var.bits_per_pixel); - } - } - - return 0; -} - -//============================== -// -// Cyberblade specific fillrect -// -//============================== - -static void cyblafb_fillrect(struct fb_info *info, const struct fb_fillrect *fr) -{ - u32 bpp = info->var.bits_per_pixel, col, desty, height; - - KD_GRAPHICS_RETURN(); - - switch (bpp) { - default: - case 8: - col = fr->color; - col |= col << 8; - col |= col << 16; - break; - case 16: - col = ((u32 *) (info->pseudo_palette))[fr->color]; - col |= col << 16; - break; - case 32: - col = ((u32 *) (info->pseudo_palette))[fr->color]; - break; - } - - desty = fr->dy; - height = fr->height; - while (height) { - out32(GEB8, basestride | ((desty * info->var.xres_virtual * - bpp) >> 6)); - out32(GE60, col); - out32(GE48, fr->rop ? 0x66 : ROP_S); - out32(GE44, 0x20000000 | 1 << 19 | 1 << 4 | 2 << 2); - out32(GE08, point(fr->dx, 0)); - out32(GE0C, point(fr->dx + fr->width - 1, - height > 4096 ? 4095 : height - 1)); - if (likely(height <= 4096)) - return; - desty += 4096; - height -= 4096; - } -} - -//================================================ -// -// Cyberblade specific copyarea -// -// This function silently assumes that it never -// will be called with width or height exceeding -// 4096. -// -//================================================ - -static void cyblafb_copyarea(struct fb_info *info, const struct fb_copyarea *ca) -{ - u32 s1, s2, d1, d2, direction; - - KD_GRAPHICS_RETURN(); - - s1 = point(ca->sx, 0); - s2 = point(ca->sx + ca->width - 1, ca->height - 1); - d1 = point(ca->dx, 0); - d2 = point(ca->dx + ca->width - 1, ca->height - 1); - - if ((ca->sy > ca->dy) || ((ca->sy == ca->dy) && (ca->sx > ca->dx))) - direction = 0; - else - direction = 2; - - out32(GEB8, basestride | ((ca->dy * info->var.xres_virtual * - info->var.bits_per_pixel) >> 6)); - out32(GEC8, basestride | ((ca->sy * info->var.xres_virtual * - info->var.bits_per_pixel) >> 6)); - out32(GE44, 0xa0000000 | 1 << 19 | 1 << 2 | direction); - out32(GE00, direction ? s2 : s1); - out32(GE04, direction ? s1 : s2); - out32(GE08, direction ? d2 : d1); - out32(GE0C, direction ? d1 : d2); -} - -//======================================================================= -// -// Cyberblade specific imageblit -// -// Accelerated for the most usual case, blitting 1 - bit deep -// character images. Everything else is passed to the generic imageblit -// unless it is so insane that it is better to printk an alert. -// -// Hardware bug: _Never_ blit across pixel column 2048, that will lock -// the system. We split those blit requests into three blitting -// operations. -// -//======================================================================= - -static void cyblafb_imageblit(struct fb_info *info, - const struct fb_image *image) -{ - u32 fgcol, bgcol; - u32 *pd = (u32 *) image->data; - u32 bpp = info->var.bits_per_pixel; - - KD_GRAPHICS_RETURN(); - - // Used only for drawing the penguine (image->depth > 1) - if (image->depth != 1) { - cfb_imageblit(info, image); - return; - } - // That should never happen, but it would be fatal - if (image->width == 0 || image->height == 0) { - output("imageblit: width/height 0 detected\n"); - return; - } - - if (info->fix.visual == FB_VISUAL_TRUECOLOR || - info->fix.visual == FB_VISUAL_DIRECTCOLOR) { - fgcol = ((u32 *) (info->pseudo_palette))[image->fg_color]; - bgcol = ((u32 *) (info->pseudo_palette))[image->bg_color]; - } else { - fgcol = image->fg_color; - bgcol = image->bg_color; - } - - switch (bpp) { - case 8: - fgcol |= fgcol << 8; - bgcol |= bgcol << 8; - case 16: - fgcol |= fgcol << 16; - bgcol |= bgcol << 16; - default: - break; - } - - out32(GEB8, basestride | ((image->dy * info->var.xres_virtual * - bpp) >> 6)); - out32(GE60, fgcol); - out32(GE64, bgcol); - - if (!(image->dx < 2048 && (image->dx + image->width - 1) >= 2048)) { - u32 dds = ((image->width + 31) >> 5) * image->height; - out32(GE44, 0xa0000000 | 1 << 20 | 1 << 19); - out32(GE08, point(image->dx, 0)); - out32(GE0C, point(image->dx + image->width - 1, - image->height - 1)); - while (dds--) - out32(GE9C, *pd++); - } else { - int i, j; - u32 ddstotal = (image->width + 31) >> 5; - u32 ddsleft = (2048 - image->dx + 31) >> 5; - u32 skipleft = ddstotal - ddsleft; - - out32(GE44, 0xa0000000 | 1 << 20 | 1 << 19); - out32(GE08, point(image->dx, 0)); - out32(GE0C, point(2048 - 1, image->height - 1)); - for (i = 0; i < image->height; i++) { - for (j = 0; j < ddsleft; j++) - out32(GE9C, *pd++); - pd += skipleft; - } - - if (image->dx % 32) { - out32(GE44, 0xa0000000 | 1 << 20 | 1 << 19); - out32(GE08, point(2048, 0)); - if (image->width > ddsleft << 5) - out32(GE0C, point(image->dx + (ddsleft << 5) - - 1, image->height - 1)); - else - out32(GE0C, point(image->dx + image->width - 1, - image->height - 1)); - pd = ((u32 *) image->data) + ddstotal - skipleft - 1; - for (i = 0; i < image->height; i++) { - out32(GE9C, swab32(swab32(*pd) << ((32 - - (image->dx & 31)) & 31))); - pd += ddstotal; - } - } - - if (skipleft) { - out32(GE44, 0xa0000000 | 1 << 20 | 1 << 19); - out32(GE08, point(image->dx + (ddsleft << 5), 0)); - out32(GE0C, point(image->dx + image->width - 1, - image->height - 1)); - pd = (u32 *) image->data; - for (i = 0; i < image->height; i++) { - pd += ddsleft; - for (j = 0; j < skipleft; j++) - out32(GE9C, *pd++); - } - } - } -} - -//========================================================== -// -// Check if video mode is acceptable. We change var->??? if -// video mode is slightly off or return error otherwise. -// info->??? must not be changed! -// -//========================================================== - -static int cyblafb_check_var(struct fb_var_screeninfo *var, - struct fb_info *info) -{ - int bpp = var->bits_per_pixel; - - // - // we try to support 8, 16, 24 and 32 bpp modes, - // default to 8 - // - // there is a 24 bpp mode, but for now we change requests to 32 bpp - // (This is what tridentfb does ... will be changed in the future) - // - // - if (bpp % 8 != 0 || bpp < 8 || bpp > 32) - bpp = 8; - if (bpp == 24) - bpp = var->bits_per_pixel = 32; - - // - // interlaced modes are broken, fail if one is requested - // - if (var->vmode & FB_VMODE_INTERLACED) - return -EINVAL; - - // - // fail if requested resolution is higher than physical - // flatpanel resolution - // - if ((displaytype == DISPLAY_FP) && nativex && var->xres > nativex) - return -EINVAL; - - // - // we do not allow vclk to exceed 230 MHz. If the requested - // vclk is too high, we default to 200 MHz - // - if ((bpp == 32 ? 200000000 : 100000000) / var->pixclock > 23000) - var->pixclock = (bpp == 32 ? 200000000 : 100000000) / 20000; - - // - // enforce (h|v)sync_len limits - // - var->hsync_len &= ~7; - if(var->hsync_len > 248) - var->hsync_len = 248; - - var->vsync_len &= 15; - - // - // Enforce horizontal and vertical hardware limits. - // 1600x1200 is mentioned as a maximum, but higher resolutions could - // work with slow refresh, small margins and short sync. - // - var->xres &= ~7; - - if (((var->xres + var->left_margin + var->right_margin + - var->hsync_len) > (bpp == 32 ? 2040 : 4088)) || - ((var->yres + var->upper_margin + var->lower_margin + - var->vsync_len) > 2047)) - return -EINVAL; - - if ((var->xres > 1600) || (var->yres > 1200)) - output("Mode %dx%d exceeds documented limits.\n", - var->xres, var->yres); - // - // try to be smart about (x|y)res_virtual problems. - // - if (var->xres > var->xres_virtual) - var->xres_virtual = var->xres; - if (var->yres > var->yres_virtual) - var->yres_virtual = var->yres; - - if (bpp == 8 || bpp == 16) { - if (var->xres_virtual > 4088) - var->xres_virtual = 4088; - } else { - if (var->xres_virtual > 2040) - var->xres_virtual = 2040; - } - var->xres_virtual &= ~7; - while (var->xres_virtual * var->yres_virtual * bpp / 8 > - info->fix.smem_len) { - if (var->yres_virtual > var->yres) - var->yres_virtual--; - else if (var->xres_virtual > var->xres) - var->xres_virtual -= 8; - else - return -EINVAL; - } - - switch (bpp) { - case 8: - var->red.offset = 0; - var->green.offset = 0; - var->blue.offset = 0; - var->red.length = 6; - var->green.length = 6; - var->blue.length = 6; - break; - case 16: - var->red.offset = 11; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 6; - var->blue.length = 5; - break; - case 32: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - break; - default: - return -EINVAL; - } - - return 0; -} - -//===================================================================== -// -// Pan the display -// -// The datasheets defines crt start address to be 20 bits wide and -// to be programmed to CR0C, CR0D, CR1E and CR27. Actually there is -// CR2B[5] as an undocumented extension bit. Epia BIOS 2.07 does use -// it, so it is also safe to be used here. BTW: datasheet CR0E on page -// 90 really is CR1E, the real CRE is documented on page 72. -// -// BUT: -// -// As of internal version 0.60 we do not use vga panning any longer. -// Vga panning did not allow us the use of all available video memory -// and thus prevented ywrap scrolling. We do use the "right view" -// register now. -// -// -//===================================================================== - -static int cyblafb_pan_display(struct fb_var_screeninfo *var, - struct fb_info *info) -{ - KD_GRAPHICS_RETURN(0); - - info->var.xoffset = var->xoffset; - info->var.yoffset = var->yoffset; - out32(GE10, 0x80000000 | ((var->xoffset + (var->yoffset * - var->xres_virtual)) * var->bits_per_pixel / 32)); - return 0; -} - -//============================================ -// -// This will really help in case of a bug ... -// dump most gaphics core registers. -// -//============================================ - -static void regdump(struct cyblafb_par *par) -{ - int i; - - if (verbosity < 2) - return; - - printk("\n"); - for (i = 0; i <= 0xff; i++) { - outb(i, 0x3d4); - printk("CR%02x=%02x ", i, inb(0x3d5)); - if (i % 16 == 15) - printk("\n"); - } - - outb(0x30, 0x3ce); - outb(inb(0x3cf) | 0x40, 0x3cf); - for (i = 0; i <= 0x1f; i++) { - if (i == 0 || (i > 2 && i < 8) || i == 0x10 || i == 0x11 - || i == 0x16) { - outb(i, 0x3d4); - printk("CR%02x=%02x ", i, inb(0x3d5)); - } else - printk("------- "); - if (i % 16 == 15) - printk("\n"); - } - outb(0x30, 0x3ce); - outb(inb(0x3cf) & 0xbf, 0x3cf); - - printk("\n"); - for (i = 0; i <= 0x7f; i++) { - outb(i, 0x3ce); - printk("GR%02x=%02x ", i, inb(0x3cf)); - if (i % 16 == 15) - printk("\n"); - } - - printk("\n"); - for (i = 0; i <= 0xff; i++) { - outb(i, 0x3c4); - printk("SR%02x=%02x ", i, inb(0x3c5)); - if (i % 16 == 15) - printk("\n"); - } - - printk("\n"); - for (i = 0; i <= 0x1F; i++) { - inb(0x3da); // next access is index! - outb(i, 0x3c0); - printk("AR%02x=%02x ", i, inb(0x3c1)); - if (i % 16 == 15) - printk("\n"); - } - printk("\n"); - - inb(0x3DA); // reset internal flag to 3c0 index - outb(0x20, 0x3C0); // enable attr - - return; -} - -//======================================================================= -// -// Save State -// -// This function is called while a switch to KD_TEXT is in progress, -// before any of the other functions are called. -// -//======================================================================= - -static void cyblafb_save_state(struct fb_info *info) -{ - struct cyblafb_par *par = info->par; - if (verbosity > 0) - output("Switching to KD_TEXT\n"); - disabled = 0; - regdump(par); - enable_mmio(); - return; -} - -//======================================================================= -// -// Restore State -// -// This function is called while a switch to KD_GRAPHICS is in progress, -// We have to turn on vga style panning registers again because the -// trident driver of X does not know about GE10. -// -//======================================================================= - -static void cyblafb_restore_state(struct fb_info *info) -{ - if (verbosity > 0) - output("Switching to KD_GRAPHICS\n"); - out32(GE10, 0); - disabled = 1; - return; -} - -//====================================== -// -// Set hardware to requested video mode -// -//====================================== - -static int cyblafb_set_par(struct fb_info *info) -{ - struct cyblafb_par *par = info->par; - u32 htotal, hdispend, hsyncstart, hsyncend, hblankstart, - hblankend, preendfetch, vtotal, vdispend, vsyncstart, - vsyncend, vblankstart, vblankend; - struct fb_var_screeninfo *var = &info->var; - int bpp = var->bits_per_pixel; - int i; - - KD_GRAPHICS_RETURN(0); - - if (verbosity > 0) - output("Switching to new mode: " - "fbset -g %d %d %d %d %d -t %d %d %d %d %d %d %d\n", - var->xres, var->yres, var->xres_virtual, - var->yres_virtual, var->bits_per_pixel, var->pixclock, - var->left_margin, var->right_margin, var->upper_margin, - var->lower_margin, var->hsync_len, var->vsync_len); - - htotal = (var->xres + var->left_margin + var->right_margin + - var->hsync_len) / 8 - 5; - hdispend = var->xres / 8 - 1; - hsyncstart = (var->xres + var->right_margin) / 8; - hsyncend = var->hsync_len / 8; - hblankstart = hdispend + 1; - hblankend = htotal + 3; // should be htotal + 5, bios does it this way - preendfetch = ((var->xres >> 3) + 1) * ((bpp + 1) >> 3); - - vtotal = var->yres + var->upper_margin + var->lower_margin + - var->vsync_len - 2; - vdispend = var->yres - 1; - vsyncstart = var->yres + var->lower_margin; - vblankstart = var->yres; - vblankend = vtotal; // should be vtotal + 2, but bios does it this way - vsyncend = var->vsync_len; - - enable_mmio(); // necessary! ... check X ... - - write3X4(CR11, read3X4(CR11) & 0x7F); // unlock cr00 .. cr07 - - write3CE(GR30, 8); - - if ((displaytype == DISPLAY_FP) && var->xres < nativex) { - - // stretch or center ? - - out8(0x3C2, 0xEB); - - write3CE(GR30, read3CE(GR30) | 0x81); // shadow mode on - - if (center) { - write3CE(GR52, (read3CE(GR52) & 0x7C) | 0x80); - write3CE(GR53, (read3CE(GR53) & 0x7C) | 0x80); - } else if (stretch) { - write3CE(GR5D, 0); - write3CE(GR52, (read3CE(GR52) & 0x7C) | 1); - write3CE(GR53, (read3CE(GR53) & 0x7C) | 1); - } - - } else { - out8(0x3C2, 0x2B); - write3CE(GR30, 8); - } - - // - // Setup CRxx regs - // - - write3X4(CR00, htotal & 0xFF); - write3X4(CR01, hdispend & 0xFF); - write3X4(CR02, hblankstart & 0xFF); - write3X4(CR03, hblankend & 0x1F); - write3X4(CR04, hsyncstart & 0xFF); - write3X4(CR05, (hsyncend & 0x1F) | ((hblankend & 0x20) << 2)); - write3X4(CR06, vtotal & 0xFF); - write3X4(CR07, (vtotal & 0x100) >> 8 | - (vdispend & 0x100) >> 7 | - (vsyncstart & 0x100) >> 6 | - (vblankstart & 0x100) >> 5 | - 0x10 | - (vtotal & 0x200) >> 4 | - (vdispend & 0x200) >> 3 | (vsyncstart & 0x200) >> 2); - write3X4(CR08, 0); - write3X4(CR09, (vblankstart & 0x200) >> 4 | 0x40 | // FIX !!! - ((info->var.vmode & FB_VMODE_DOUBLE) ? 0x80 : 0)); - write3X4(CR0A, 0); // Init to some reasonable default - write3X4(CR0B, 0); // Init to some reasonable default - write3X4(CR0C, 0); // Offset 0 - write3X4(CR0D, 0); // Offset 0 - write3X4(CR0E, 0); // Init to some reasonable default - write3X4(CR0F, 0); // Init to some reasonable default - write3X4(CR10, vsyncstart & 0xFF); - write3X4(CR11, (vsyncend & 0x0F)); - write3X4(CR12, vdispend & 0xFF); - write3X4(CR13, ((info->var.xres_virtual * bpp) / (4 * 16)) & 0xFF); - write3X4(CR14, 0x40); // double word mode - write3X4(CR15, vblankstart & 0xFF); - write3X4(CR16, vblankend & 0xFF); - write3X4(CR17, 0xE3); - write3X4(CR18, 0xFF); - // CR19: needed for interlaced modes ... ignore it for now - write3X4(CR1A, 0x07); // Arbitration Control Counter 1 - write3X4(CR1B, 0x07); // Arbitration Control Counter 2 - write3X4(CR1C, 0x07); // Arbitration Control Counter 3 - write3X4(CR1D, 0x00); // Don't know, doesn't hurt ; -) - write3X4(CR1E, (info->var.vmode & FB_VMODE_INTERLACED) ? 0x84 : 0x80); - // CR1F: do not set, contains BIOS info about memsize - write3X4(CR20, 0x20); // enabe wr buf, disable 16bit planar mode - write3X4(CR21, 0x20); // enable linear memory access - // CR22: RO cpu latch readback - // CR23: ??? - // CR24: RO AR flag state - // CR25: RAMDAC rw timing, pclk buffer tristate control ???? - // CR26: ??? - write3X4(CR27, (vdispend & 0x400) >> 6 | - (vsyncstart & 0x400) >> 5 | - (vblankstart & 0x400) >> 4 | - (vtotal & 0x400) >> 3 | - 0x8); - // CR28: ??? - write3X4(CR29, (read3X4(CR29) & 0xCF) | ((((info->var.xres_virtual * - bpp) / (4 * 16)) & 0x300) >> 4)); - write3X4(CR2A, read3X4(CR2A) | 0x40); - write3X4(CR2B, (htotal & 0x100) >> 8 | - (hdispend & 0x100) >> 7 | - // (0x00 & 0x100) >> 6 | hinterlace para bit 8 ??? - (hsyncstart & 0x100) >> 5 | - (hblankstart & 0x100) >> 4); - // CR2C: ??? - // CR2D: initialized in cyblafb_setup_GE() - write3X4(CR2F, 0x92); // conservative, better signal quality - // CR30: reserved - // CR31: reserved - // CR32: reserved - // CR33: reserved - // CR34: disabled in CR36 - // CR35: disabled in CR36 - // CR36: initialized in cyblafb_setup_GE - // CR37: i2c, ignore for now - write3X4(CR38, (bpp == 8) ? 0x00 : // - (bpp == 16) ? 0x05 : // highcolor - (bpp == 24) ? 0x29 : // packed 24bit truecolor - (bpp == 32) ? 0x09 : 0); // truecolor, 16 bit pixelbus - write3X4(CR39, 0x01 | // MMIO enable - (pcirb ? 0x02 : 0) | // pci read burst enable - (pciwb ? 0x04 : 0)); // pci write burst enable - write3X4(CR55, 0x1F | // pci clocks * 2 for STOP# during 1st data phase - (pcirr ? 0x40 : 0) | // pci read retry enable - (pciwr ? 0x80 : 0)); // pci write retry enable - write3X4(CR56, preendfetch >> 8 < 2 ? (preendfetch >> 8 & 0x01) | 2 - : 0); - write3X4(CR57, preendfetch >> 8 < 2 ? preendfetch & 0xff : 0); - write3X4(CR58, 0x82); // Bios does this .... don't know more - // - // Setup SRxx regs - // - write3C4(SR00, 3); - write3C4(SR01, 1); //set char clock 8 dots wide - write3C4(SR02, 0x0F); //enable 4 maps needed in chain4 mode - write3C4(SR03, 0); //no character map select - write3C4(SR04, 0x0E); //memory mode: ext mem, even, chain4 - - out8(0x3C4, 0x0b); - in8(0x3C5); // Set NEW mode - write3C4(SR0D, 0x00); // test ... check - - set_vclk(par, (bpp == 32 ? 200000000 : 100000000) - / info->var.pixclock); //SR18, SR19 - - // - // Setup GRxx regs - // - write3CE(GR00, 0x00); // test ... check - write3CE(GR01, 0x00); // test ... check - write3CE(GR02, 0x00); // test ... check - write3CE(GR03, 0x00); // test ... check - write3CE(GR04, 0x00); // test ... check - write3CE(GR05, 0x40); // no CGA compat, allow 256 col - write3CE(GR06, 0x05); // graphics mode - write3CE(GR07, 0x0F); // planes? - write3CE(GR08, 0xFF); // test ... check - write3CE(GR0F, (bpp == 32) ? 0x1A : 0x12); // vclk / 2 if 32bpp, chain4 - write3CE(GR20, 0xC0); // test ... check - write3CE(GR2F, 0xA0); // PCLK = VCLK, no skew, - - // - // Setup ARxx regs - // - for (i = 0; i < 0x10; i++) // set AR00 .. AR0f - write3C0(i, i); - write3C0(AR10, 0x41); // graphics mode and support 256 color modes - write3C0(AR12, 0x0F); // planes - write3C0(AR13, 0); // horizontal pel panning - in8(0x3DA); // reset internal flag to 3c0 index - out8(0x3C0, 0x20); // enable attr - - // - // Setup hidden RAMDAC command register - // - in8(0x3C8); // these reads are - in8(0x3C6); // necessary to - in8(0x3C6); // unmask the RAMDAC - in8(0x3C6); // command reg, otherwise - in8(0x3C6); // we would write the pixelmask reg! - out8(0x3C6, (bpp == 8) ? 0x00 : // 256 colors - (bpp == 15) ? 0x10 : // - (bpp == 16) ? 0x30 : // hicolor - (bpp == 24) ? 0xD0 : // truecolor - (bpp == 32) ? 0xD0 : 0); // truecolor - in8(0x3C8); - - // - // GR31 is not mentioned in the datasheet - // - if (displaytype == DISPLAY_FP) - write3CE(GR31, (read3CE(GR31) & 0x8F) | - ((info->var.yres > 1024) ? 0x50 : - (info->var.yres > 768) ? 0x30 : - (info->var.yres > 600) ? 0x20 : - (info->var.yres > 480) ? 0x10 : 0)); - - info->fix.visual = (bpp == 8) ? FB_VISUAL_PSEUDOCOLOR - : FB_VISUAL_TRUECOLOR; - info->fix.line_length = info->var.xres_virtual * (bpp >> 3); - info->cmap.len = (bpp == 8) ? 256 : 16; - - // - // init acceleration engine - // - cyblafb_setup_GE(info->var.xres_virtual, info->var.bits_per_pixel); - - // - // Set/clear flags to allow proper scroll mode selection. - // - if (var->xres == var->xres_virtual) - info->flags &= ~FBINFO_HWACCEL_XPAN; - else - info->flags |= FBINFO_HWACCEL_XPAN; - - if (var->yres == var->yres_virtual) - info->flags &= ~FBINFO_HWACCEL_YPAN; - else - info->flags |= FBINFO_HWACCEL_YPAN; - - if (info->fix.smem_len != - var->xres_virtual * var->yres_virtual * bpp / 8) - info->flags &= ~FBINFO_HWACCEL_YWRAP; - else - info->flags |= FBINFO_HWACCEL_YWRAP; - - regdump(par); - - return 0; -} - -//======================== -// -// Set one color register -// -//======================== - -static int cyblafb_setcolreg(unsigned regno, unsigned red, unsigned green, - unsigned blue, unsigned transp, - struct fb_info *info) -{ - int bpp = info->var.bits_per_pixel; - - KD_GRAPHICS_RETURN(0); - - if (regno >= info->cmap.len) - return 1; - - if (bpp == 8) { - out8(0x3C6, 0xFF); - out8(0x3C8, regno); - out8(0x3C9, red >> 10); - out8(0x3C9, green >> 10); - out8(0x3C9, blue >> 10); - - } else if (regno < 16) { - if (bpp == 16) // RGB 565 - ((u32 *) info->pseudo_palette)[regno] = - (red & 0xF800) | - ((green & 0xFC00) >> 5) | - ((blue & 0xF800) >> 11); - else if (bpp == 32) // ARGB 8888 - ((u32 *) info->pseudo_palette)[regno] = - ((transp & 0xFF00) << 16) | - ((red & 0xFF00) << 8) | - ((green & 0xFF00)) | ((blue & 0xFF00) >> 8); - } - - return 0; -} - -//========================================================== -// -// Try blanking the screen. For flat panels it does nothing -// -//========================================================== - -static int cyblafb_blank(int blank_mode, struct fb_info *info) -{ - unsigned char PMCont, DPMSCont; - - KD_GRAPHICS_RETURN(0); - - if (displaytype == DISPLAY_FP) - return 0; - - out8(0x83C8, 0x04); // DPMS Control - PMCont = in8(0x83C6) & 0xFC; - - DPMSCont = read3CE(GR23) & 0xFC; - - switch (blank_mode) { - case FB_BLANK_UNBLANK: // Screen: On, HSync: On, VSync: On - case FB_BLANK_NORMAL: // Screen: Off, HSync: On, VSync: On - PMCont |= 0x03; - DPMSCont |= 0x00; - break; - case FB_BLANK_HSYNC_SUSPEND: // Screen: Off, HSync: Off, VSync: On - PMCont |= 0x02; - DPMSCont |= 0x01; - break; - case FB_BLANK_VSYNC_SUSPEND: // Screen: Off, HSync: On, VSync: Off - PMCont |= 0x02; - DPMSCont |= 0x02; - break; - case FB_BLANK_POWERDOWN: // Screen: Off, HSync: Off, VSync: Off - PMCont |= 0x00; - DPMSCont |= 0x03; - break; - } - - write3CE(GR23, DPMSCont); - out8(0x83C8, 4); - out8(0x83C6, PMCont); - // - // let fbcon do a softblank for us - // - return (blank_mode == FB_BLANK_NORMAL) ? 1 : 0; -} - -static struct fb_ops cyblafb_ops __devinitdata = { - .owner = THIS_MODULE, - .fb_setcolreg = cyblafb_setcolreg, - .fb_pan_display = cyblafb_pan_display, - .fb_blank = cyblafb_blank, - .fb_check_var = cyblafb_check_var, - .fb_set_par = cyblafb_set_par, - .fb_fillrect = cyblafb_fillrect, - .fb_copyarea = cyblafb_copyarea, - .fb_imageblit = cyblafb_imageblit, - .fb_sync = cyblafb_sync, - .fb_restore_state = cyblafb_restore_state, - .fb_save_state = cyblafb_save_state, -}; - -//========================================================================== -// -// getstartupmode() decides about the inital video mode -// -// There is no reason to use modedb, a lot of video modes there would -// need altered timings to display correctly. So I decided that it is much -// better to provide a limited optimized set of modes plus the option of -// using the mode in effect at startup time (might be selected using the -// vga=??? parameter). After that the user might use fbset to select any -// mode he likes, check_var will not try to alter geometry parameters as -// it would be necessary otherwise. -// -//========================================================================== - -static int __devinit getstartupmode(struct fb_info *info) -{ - u32 htotal, hdispend, hsyncstart, hsyncend, hblankstart, hblankend, - vtotal, vdispend, vsyncstart, vsyncend, vblankstart, vblankend, - cr00, cr01, cr02, cr03, cr04, cr05, cr2b, - cr06, cr07, cr09, cr10, cr11, cr12, cr15, cr16, cr27, - cr38, sr0d, sr18, sr19, gr0f, fi, pxclkdiv, vclkdiv, tmp, i; - - struct modus { - int xres; int vxres; int yres; int vyres; - int bpp; int pxclk; - int left_margin; int right_margin; - int upper_margin; int lower_margin; - int hsync_len; int vsync_len; - } modedb[5] = { - { - 0, 2048, 0, 4096, 0, 0, 0, 0, 0, 0, 0, 0}, { - 640, 2048, 480, 4096, 0, 0, -40, 24, 17, 0, 216, 3}, { - 800, 2048, 600, 4096, 0, 0, 96, 24, 14, 0, 136, 11}, { - 1024, 2048, 768, 4096, 0, 0, 144, 24, 29, 0, 120, 3}, { - 1280, 2048, 1024, 4096, 0, 0, 232, 16, 39, 0, 160, 3} - }; - - outb(0x00, 0x3d4); cr00 = inb(0x3d5); - outb(0x01, 0x3d4); cr01 = inb(0x3d5); - outb(0x02, 0x3d4); cr02 = inb(0x3d5); - outb(0x03, 0x3d4); cr03 = inb(0x3d5); - outb(0x04, 0x3d4); cr04 = inb(0x3d5); - outb(0x05, 0x3d4); cr05 = inb(0x3d5); - outb(0x06, 0x3d4); cr06 = inb(0x3d5); - outb(0x07, 0x3d4); cr07 = inb(0x3d5); - outb(0x09, 0x3d4); cr09 = inb(0x3d5); - outb(0x10, 0x3d4); cr10 = inb(0x3d5); - outb(0x11, 0x3d4); cr11 = inb(0x3d5); - outb(0x12, 0x3d4); cr12 = inb(0x3d5); - outb(0x15, 0x3d4); cr15 = inb(0x3d5); - outb(0x16, 0x3d4); cr16 = inb(0x3d5); - outb(0x27, 0x3d4); cr27 = inb(0x3d5); - outb(0x2b, 0x3d4); cr2b = inb(0x3d5); - outb(0x38, 0x3d4); cr38 = inb(0x3d5); - - outb(0x0b, 0x3c4); - inb(0x3c5); - - outb(0x0d, 0x3c4); sr0d = inb(0x3c5); - outb(0x18, 0x3c4); sr18 = inb(0x3c5); - outb(0x19, 0x3c4); sr19 = inb(0x3c5); - outb(0x0f, 0x3ce); gr0f = inb(0x3cf); - - htotal = cr00 | (cr2b & 0x01) << 8; - hdispend = cr01 | (cr2b & 0x02) << 7; - hblankstart = cr02 | (cr2b & 0x10) << 4; - hblankend = (cr03 & 0x1f) | (cr05 & 0x80) >> 2; - hsyncstart = cr04 | (cr2b & 0x08) << 5; - hsyncend = cr05 & 0x1f; - - modedb[0].xres = hblankstart * 8; - modedb[0].hsync_len = hsyncend * 8; - modedb[0].right_margin = hsyncstart * 8 - modedb[0].xres; - modedb[0].left_margin = (htotal + 5) * 8 - modedb[0].xres - - modedb[0].right_margin - modedb[0].hsync_len; - - vtotal = cr06 | (cr07 & 0x01) << 8 | (cr07 & 0x20) << 4 - | (cr27 & 0x80) << 3; - vdispend = cr12 | (cr07 & 0x02) << 7 | (cr07 & 0x40) << 3 - | (cr27 & 0x10) << 6; - vsyncstart = cr10 | (cr07 & 0x04) << 6 | (cr07 & 0x80) << 2 - | (cr27 & 0x20) << 5; - vsyncend = cr11 & 0x0f; - vblankstart = cr15 | (cr07 & 0x08) << 5 | (cr09 & 0x20) << 4 - | (cr27 & 0x40) << 4; - vblankend = cr16; - - modedb[0].yres = vdispend + 1; - modedb[0].vsync_len = vsyncend; - modedb[0].lower_margin = vsyncstart - modedb[0].yres; - modedb[0].upper_margin = vtotal - modedb[0].yres - - modedb[0].lower_margin - modedb[0].vsync_len + 2; - - tmp = cr38 & 0x3c; - modedb[0].bpp = tmp == 0 ? 8 : tmp == 4 ? 16 : tmp == 28 ? 24 : - tmp == 8 ? 32 : 8; - - fi = ((5864727 * (sr18 + 8)) / - (((sr19 & 0x3f) + 2) * (1 << ((sr19 & 0xc0) >> 6)))) >> 12; - pxclkdiv = ((gr0f & 0x08) >> 3 | (gr0f & 0x40) >> 5) + 1; - tmp = sr0d & 0x06; - vclkdiv = tmp == 0 ? 2 : tmp == 2 ? 4 : tmp == 4 ? 8 : 3; // * 2 ! - modedb[0].pxclk = ((100000000 * pxclkdiv * vclkdiv) >> 1) / fi; - - if (verbosity > 0) - output("detected startup mode: " - "fbset -g %d %d %d ??? %d -t %d %d %d %d %d %d %d\n", - modedb[0].xres, modedb[0].yres, modedb[0].xres, - modedb[0].bpp, modedb[0].pxclk, modedb[0].left_margin, - modedb[0].right_margin, modedb[0].upper_margin, - modedb[0].lower_margin, modedb[0].hsync_len, - modedb[0].vsync_len); - - // - // We use this goto target in case of a failed check_var. No, I really - // do not want to do it in another way! - // - - tryagain: - - i = (mode == NULL) ? 0 : - !strncmp(mode, "640x480", 7) ? 1 : - !strncmp(mode, "800x600", 7) ? 2 : - !strncmp(mode, "1024x768", 8) ? 3 : - !strncmp(mode, "1280x1024", 9) ? 4 : 0; - - ref = (ref < 50) ? 50 : (ref > 85) ? 85 : ref; - - if (i == 0) { - info->var.pixclock = modedb[i].pxclk; - info->var.bits_per_pixel = modedb[i].bpp; - } else { - info->var.pixclock = (100000000 / - ((modedb[i].left_margin + - modedb[i].xres + - modedb[i].right_margin + - modedb[i].hsync_len) * - (modedb[i].upper_margin + - modedb[i].yres + - modedb[i].lower_margin + - modedb[i].vsync_len) * ref / 10000)); - info->var.bits_per_pixel = bpp; - } - - info->var.left_margin = modedb[i].left_margin; - info->var.right_margin = modedb[i].right_margin; - info->var.xres = modedb[i].xres; - if (!(modedb[i].yres == 1280 && modedb[i].bpp == 32)) - info->var.xres_virtual = modedb[i].vxres; - else - info->var.xres_virtual = modedb[i].xres; - info->var.xoffset = 0; - info->var.hsync_len = modedb[i].hsync_len; - info->var.upper_margin = modedb[i].upper_margin; - info->var.yres = modedb[i].yres; - info->var.yres_virtual = modedb[i].vyres; - info->var.yoffset = 0; - info->var.lower_margin = modedb[i].lower_margin; - info->var.vsync_len = modedb[i].vsync_len; - info->var.sync = 0; - info->var.vmode = FB_VMODE_NONINTERLACED; - - if (cyblafb_check_var(&info->var, info)) { - // 640x480 - 8@75 should really never fail. One case would - // be fp == 1 and nativex < 640 ... give up then - if (i == 1 && bpp == 8 && ref == 75) { - output("Can't find a valid mode :-(\n"); - return -EINVAL; - } - // Our detected mode is unlikely to fail. If it does, - // try 640x480 - 8@75 ... - if (i == 0) { - mode = "640x480"; - bpp = 8; - ref = 75; - output("Detected mode failed check_var! " - "Trying 640x480 - 8@75\n"); - goto tryagain; - } - // A specified video mode failed for some reason. - // Try the startup mode first - output("Specified mode '%s' failed check! " - "Falling back to startup mode.\n", mode); - mode = NULL; - goto tryagain; - } - - return 0; -} - -//======================================================== -// -// Detect activated memory size. Undefined values require -// memsize parameter. -// -//======================================================== - -static unsigned int __devinit get_memsize(void) -{ - unsigned char tmp; - unsigned int k; - - if (memsize) - k = memsize * Kb; - else { - tmp = read3X4(CR1F) & 0x0F; - switch (tmp) { - case 0x03: - k = 1 * 1024 * 1024; - break; - case 0x07: - k = 2 * 1024 * 1024; - break; - case 0x0F: - k = 4 * 1024 * 1024; - break; - case 0x04: - k = 8 * 1024 * 1024; - break; - default: - k = 1 * 1024 * 1024; - output("Unknown memory size code %x in CR1F." - " We default to 1 Mb for now, please" - " do provide a memsize parameter!\n", tmp); - } - } - - if (verbosity > 0) - output("framebuffer size = %d Kb\n", k / Kb); - return k; -} - -//========================================================= -// -// Detect if a flat panel monitor connected to the special -// interface is active. Override is possible by fp and crt -// parameters. -// -//========================================================= - -static unsigned int __devinit get_displaytype(void) -{ - if (fp) - return DISPLAY_FP; - if (crt) - return DISPLAY_CRT; - return (read3CE(GR33) & 0x10) ? DISPLAY_FP : DISPLAY_CRT; -} - -//===================================== -// -// Get native resolution of flat panel -// -//===================================== - -static int __devinit get_nativex(void) -{ - int x, y, tmp; - - if (nativex) - return nativex; - - tmp = (read3CE(GR52) >> 4) & 3; - - switch (tmp) { - case 0: x = 1280; y = 1024; - break; - case 2: x = 1024; y = 768; - break; - case 3: x = 800; y = 600; - break; - case 4: x = 1400; y = 1050; - break; - case 1: - default: - x = 640; y = 480; - break; - } - - if (verbosity > 0) - output("%dx%d flat panel found\n", x, y); - return x; -} - -static int __devinit cybla_pci_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - struct fb_info *info; - struct cyblafb_par *par; - - info = framebuffer_alloc(sizeof(struct cyblafb_par), &dev->dev); - if (!info) - goto errout_alloc_info; - - info->pixmap.addr = kzalloc(CYBLAFB_PIXMAPSIZE, GFP_KERNEL); - if (!info->pixmap.addr) { - output("allocation of pixmap buffer failed!\n"); - goto errout_alloc_pixmap; - } - info->pixmap.size = CYBLAFB_PIXMAPSIZE - 4; - info->pixmap.buf_align = 4; - info->pixmap.access_align = 32; - info->pixmap.flags = FB_PIXMAP_SYSTEM; - info->pixmap.scan_align = 4; - - par = info->par; - par->ops = cyblafb_ops; - - info->fix = cyblafb_fix; - info->fbops = &par->ops; - info->fix = cyblafb_fix; - - if (pci_enable_device(dev)) { - output("could not enable device!\n"); - goto errout_enable; - } - // might already be requested by vga console or vesafb, - // so we do care about success - if (!request_region(0x3c0, 0x20, "cyblafb")) { - output("region 0x3c0/0x20 already reserved\n"); - vesafb |= 1; - - } - // - // Graphics Engine Registers - // - if (!request_region(GEBase, 0x100, "cyblafb")) { - output("region %#x/0x100 already reserved\n", GEBase); - vesafb |= 2; - } - - regdump(par); - - enable_mmio(); - - // setup MMIO region - info->fix.mmio_start = pci_resource_start(dev, 1); - info->fix.mmio_len = 0x20000; - - if (!request_mem_region(info->fix.mmio_start, - info->fix.mmio_len, "cyblafb")) { - output("request_mem_region failed for mmio region!\n"); - goto errout_mmio_reqmem; - } - - io_virt = ioremap_nocache(info->fix.mmio_start, info->fix.mmio_len); - - if (!io_virt) { - output("ioremap failed for mmio region\n"); - goto errout_mmio_remap; - } - // setup framebuffer memory ... might already be requested - // by vesafb. Not to fail in case of an unsuccessful request - // is useful if both are loaded. - info->fix.smem_start = pci_resource_start(dev, 0); - info->fix.smem_len = get_memsize(); - - if (!request_mem_region(info->fix.smem_start, - info->fix.smem_len, "cyblafb")) { - output("region %#lx/%#x already reserved\n", - info->fix.smem_start, info->fix.smem_len); - vesafb |= 4; - } - - info->screen_base = ioremap_nocache(info->fix.smem_start, - info->fix.smem_len); - - if (!info->screen_base) { - output("ioremap failed for smem region\n"); - goto errout_smem_remap; - } - - displaytype = get_displaytype(); - - if (displaytype == DISPLAY_FP) - nativex = get_nativex(); - - info->flags = FBINFO_DEFAULT - | FBINFO_HWACCEL_COPYAREA - | FBINFO_HWACCEL_FILLRECT - | FBINFO_HWACCEL_IMAGEBLIT - | FBINFO_READS_FAST -// | FBINFO_PARTIAL_PAN_OK - | FBINFO_MISC_ALWAYS_SETPAR; - - info->pseudo_palette = par->pseudo_pal; - - if (getstartupmode(info)) - goto errout_findmode; - - fb_alloc_cmap(&info->cmap, 256, 0); - - if (register_framebuffer(info)) { - output("Could not register CyBla framebuffer\n"); - goto errout_register; - } - - pci_set_drvdata(dev, info); - - // - // normal exit and error paths - // - - return 0; - - errout_register: - errout_findmode: - iounmap(info->screen_base); - errout_smem_remap: - if (!(vesafb & 4)) - release_mem_region(info->fix.smem_start, info->fix.smem_len); - iounmap(io_virt); - errout_mmio_remap: - release_mem_region(info->fix.mmio_start, info->fix.mmio_len); - errout_mmio_reqmem: - if (!(vesafb & 1)) - release_region(0x3c0, 32); - errout_enable: - kfree(info->pixmap.addr); - errout_alloc_pixmap: - framebuffer_release(info); - errout_alloc_info: - output("CyblaFB version %s aborting init.\n", VERSION); - return -ENODEV; -} - -static void __devexit cybla_pci_remove(struct pci_dev *dev) -{ - struct fb_info *info = pci_get_drvdata(dev); - - unregister_framebuffer(info); - iounmap(io_virt); - iounmap(info->screen_base); - if (!(vesafb & 4)) - release_mem_region(info->fix.smem_start, info->fix.smem_len); - release_mem_region(info->fix.mmio_start, info->fix.mmio_len); - fb_dealloc_cmap(&info->cmap); - if (!(vesafb & 2)) - release_region(GEBase, 0x100); - if (!(vesafb & 1)) - release_region(0x3c0, 32); - kfree(info->pixmap.addr); - framebuffer_release(info); - output("CyblaFB version %s normal exit.\n", VERSION); -} - -// -// List of boards that we are trying to support -// -static struct pci_device_id cybla_devices[] = { - {PCI_VENDOR_ID_TRIDENT, CYBERBLADEi1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, - {0,} -}; - -MODULE_DEVICE_TABLE(pci, cybla_devices); - -static struct pci_driver cyblafb_pci_driver = { - .name = "cyblafb", - .id_table = cybla_devices, - .probe = cybla_pci_probe, - .remove = __devexit_p(cybla_pci_remove) -}; - -//============================================================= -// -// kernel command line example: -// -// video=cyblafb:1280x1024, bpp=16, ref=50 ... -// -// modprobe command line example: -// -// modprobe cyblafb mode=1280x1024 bpp=16 ref=50 ... -// -//============================================================= - -static int __devinit cyblafb_init(void) -{ -#ifndef MODULE - char *options = NULL; - char *opt; - - if (fb_get_options("cyblafb", &options)) - return -ENODEV; - - if (options && *options) - while ((opt = strsep(&options, ",")) != NULL) { - if (!*opt) - continue; - else if (!strncmp(opt, "bpp=", 4)) - bpp = simple_strtoul(opt + 4, NULL, 0); - else if (!strncmp(opt, "ref=", 4)) - ref = simple_strtoul(opt + 4, NULL, 0); - else if (!strncmp(opt, "fp", 2)) - displaytype = DISPLAY_FP; - else if (!strncmp(opt, "crt", 3)) - displaytype = DISPLAY_CRT; - else if (!strncmp(opt, "nativex=", 8)) - nativex = simple_strtoul(opt + 8, NULL, 0); - else if (!strncmp(opt, "center", 6)) - center = 1; - else if (!strncmp(opt, "stretch", 7)) - stretch = 1; - else if (!strncmp(opt, "pciwb=", 6)) - pciwb = simple_strtoul(opt + 6, NULL, 0); - else if (!strncmp(opt, "pcirb=", 6)) - pcirb = simple_strtoul(opt + 6, NULL, 0); - else if (!strncmp(opt, "pciwr=", 6)) - pciwr = simple_strtoul(opt + 6, NULL, 0); - else if (!strncmp(opt, "pcirr=", 6)) - pcirr = simple_strtoul(opt + 6, NULL, 0); - else if (!strncmp(opt, "memsize=", 8)) - memsize = simple_strtoul(opt + 8, NULL, 0); - else if (!strncmp(opt, "verbosity=", 10)) - verbosity = simple_strtoul(opt + 10, NULL, 0); - else - mode = opt; - } -#endif - output("CyblaFB version %s initializing\n", VERSION); - return pci_register_driver(&cyblafb_pci_driver); -} - -static void __exit cyblafb_exit(void) -{ - pci_unregister_driver(&cyblafb_pci_driver); -} - -module_init(cyblafb_init); -module_exit(cyblafb_exit); - -MODULE_AUTHOR("Knut Petersen <knut_petersen@t-online.de>"); -MODULE_DESCRIPTION("Framebuffer driver for Cyberblade/i1 graphics core"); -MODULE_LICENSE("GPL"); diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c index daf9b81878a4..0c5b9a9fd56f 100644 --- a/drivers/video/efifb.c +++ b/drivers/video/efifb.c @@ -129,6 +129,8 @@ static int set_system(const struct dmi_system_id *id) screen_info.lfb_width = info->width; if (screen_info.lfb_height == 0) screen_info.lfb_height = info->height; + if (screen_info.orig_video_isVGA == 0) + screen_info.orig_video_isVGA = VIDEO_TYPE_EFI; return 0; } @@ -374,9 +376,10 @@ static int __init efifb_init(void) int ret; char *option = NULL; + dmi_check_system(dmi_system_table); + if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) return -ENODEV; - dmi_check_system(dmi_system_table); if (fb_get_options("efifb", &option)) return -ENODEV; diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 082026546aee..0a7a6679ee6e 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -85,8 +85,9 @@ EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); /* vm_ops->page_mkwrite handler */ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma, - struct page *page) + struct vm_fault *vmf) { + struct page *page = vmf->page; struct fb_info *info = vma->vm_private_data; struct fb_deferred_io *fbdefio = info->fbdefio; struct page *cur; diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index cfd9dce1ce0b..2ac32e6b5953 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -46,6 +46,17 @@ struct fb_info *registered_fb[FB_MAX] __read_mostly; int num_registered_fb __read_mostly; +int lock_fb_info(struct fb_info *info) +{ + mutex_lock(&info->lock); + if (!info->fbops) { + mutex_unlock(&info->lock); + return 0; + } + return 1; +} +EXPORT_SYMBOL(lock_fb_info); + /* * Helpers */ @@ -1086,13 +1097,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EINVAL; con2fb.framebuffer = -1; event.data = &con2fb; - - if (!lock_fb_info(info)) - return -ENODEV; event.info = info; fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event); - unlock_fb_info(info); - ret = copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0; break; case FBIOPUT_CON2FBMAP: @@ -1109,12 +1115,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, break; } event.data = &con2fb; - if (!lock_fb_info(info)) - return -ENODEV; event.info = info; - ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, - &event); - unlock_fb_info(info); + ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event); break; case FBIOBLANK: if (!lock_fb_info(info)) diff --git a/drivers/video/nvidia/nv_type.h b/drivers/video/nvidia/nv_type.h index f132aab8c5de..c03f7f55c76d 100644 --- a/drivers/video/nvidia/nv_type.h +++ b/drivers/video/nvidia/nv_type.h @@ -5,7 +5,6 @@ #include <linux/types.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> -#include <linux/mutex.h> #include <video/vga.h> #define NV_ARCH_04 0x04 @@ -99,7 +98,6 @@ struct nvidia_par { RIVA_HW_STATE initial_state; RIVA_HW_STATE *CurrentState; struct vgastate vgastate; - struct mutex open_lock; u32 pseudo_palette[16]; struct pci_dev *pci_dev; u32 Architecture; diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c index 9dbb5a5a267b..efe10ff86d63 100644 --- a/drivers/video/nvidia/nvidia.c +++ b/drivers/video/nvidia/nvidia.c @@ -1004,15 +1004,12 @@ static int nvidiafb_open(struct fb_info *info, int user) { struct nvidia_par *par = info->par; - mutex_lock(&par->open_lock); - if (!par->open_count) { save_vga_x86(par); nvidia_save_vga(par, &par->initial_state); } par->open_count++; - mutex_unlock(&par->open_lock); return 0; } @@ -1021,8 +1018,6 @@ static int nvidiafb_release(struct fb_info *info, int user) struct nvidia_par *par = info->par; int err = 0; - mutex_lock(&par->open_lock); - if (!par->open_count) { err = -EINVAL; goto done; @@ -1035,7 +1030,6 @@ static int nvidiafb_release(struct fb_info *info, int user) par->open_count--; done: - mutex_unlock(&par->open_lock); return err; } @@ -1300,7 +1294,6 @@ static int __devinit nvidiafb_probe(struct pci_dev *pd, par = info->par; par->pci_dev = pd; - mutex_init(&par->open_lock); info->pixmap.addr = kzalloc(8 * 1024, GFP_KERNEL); if (info->pixmap.addr == NULL) diff --git a/drivers/video/omap/hwa742.c b/drivers/video/omap/hwa742.c index f24df0b54e1c..8aa6e47202b9 100644 --- a/drivers/video/omap/hwa742.c +++ b/drivers/video/omap/hwa742.c @@ -742,7 +742,7 @@ static int calc_extif_timings(unsigned long sysclk, int *extif_mem_div) if (calc_reg_timing(sysclk, div) == 0) break; } - if (div > max_clk_div) + if (div >= max_clk_div) goto err; *extif_mem_div = div; @@ -752,7 +752,7 @@ static int calc_extif_timings(unsigned long sysclk, int *extif_mem_div) break; } - if (div > max_clk_div) + if (div >= max_clk_div) goto err; return 0; diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c index 1a49519dafa4..060d72fe57cb 100644 --- a/drivers/video/omap/omapfb_main.c +++ b/drivers/video/omap/omapfb_main.c @@ -338,7 +338,7 @@ static int omapfb_blank(int blank, struct fb_info *fbi) omapfb_rqueue_lock(fbdev); switch (blank) { - case VESA_NO_BLANKING: + case FB_BLANK_UNBLANK: if (fbdev->state == OMAPFB_SUSPENDED) { if (fbdev->ctrl->resume) fbdev->ctrl->resume(); @@ -349,7 +349,7 @@ static int omapfb_blank(int blank, struct fb_info *fbi) do_update = 1; } break; - case VESA_POWERDOWN: + case FB_BLANK_POWERDOWN: if (fbdev->state == OMAPFB_ACTIVE) { fbdev->panel->disable(fbdev->panel); if (fbdev->ctrl->suspend) @@ -1818,7 +1818,7 @@ static int omapfb_suspend(struct platform_device *pdev, pm_message_t mesg) { struct omapfb_device *fbdev = platform_get_drvdata(pdev); - omapfb_blank(VESA_POWERDOWN, fbdev->fb_info[0]); + omapfb_blank(FB_BLANK_POWERDOWN, fbdev->fb_info[0]); return 0; } @@ -1828,7 +1828,7 @@ static int omapfb_resume(struct platform_device *pdev) { struct omapfb_device *fbdev = platform_get_drvdata(pdev); - omapfb_blank(VESA_NO_BLANKING, fbdev->fb_info[0]); + omapfb_blank(FB_BLANK_UNBLANK, fbdev->fb_info[0]); return 0; } diff --git a/drivers/video/s1d13xxxfb.c b/drivers/video/s1d13xxxfb.c index a7b01d2724b5..0726aecf3b7e 100644 --- a/drivers/video/s1d13xxxfb.c +++ b/drivers/video/s1d13xxxfb.c @@ -50,9 +50,22 @@ #define dbg(fmt, args...) do { } while (0) #endif -static const int __devinitconst s1d13xxxfb_revisions[] = { - S1D13506_CHIP_REV, /* Rev.4 on HP Jornada 7xx S1D13506 */ - S1D13806_CHIP_REV, /* Rev.7 on .. */ +/* + * List of card production ids + */ +static const int s1d13xxxfb_prod_ids[] = { + S1D13505_PROD_ID, + S1D13506_PROD_ID, + S1D13806_PROD_ID, +}; + +/* + * List of card strings + */ +static const char *s1d13xxxfb_prod_names[] = { + "S1D13505", + "S1D13506", + "S1D13806", }; /* @@ -377,7 +390,6 @@ s1d13xxxfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) return 0; } - /* framebuffer information structures */ static struct fb_ops s1d13xxxfb_fbops = { @@ -544,7 +556,7 @@ s1d13xxxfb_probe(struct platform_device *pdev) struct s1d13xxxfb_pdata *pdata = NULL; int ret = 0; int i; - u8 revision; + u8 revision, prod_id; dbg("probe called: device is %p\n", pdev); @@ -613,19 +625,31 @@ s1d13xxxfb_probe(struct platform_device *pdev) goto bail; } - revision = s1d13xxxfb_readreg(default_par, S1DREG_REV_CODE) >> 2; - + /* production id is top 6 bits */ + prod_id = s1d13xxxfb_readreg(default_par, S1DREG_REV_CODE) >> 2; + /* revision id is lower 2 bits */ + revision = s1d13xxxfb_readreg(default_par, S1DREG_REV_CODE) & 0x3; ret = -ENODEV; - for (i = 0; i < ARRAY_SIZE(s1d13xxxfb_revisions); i++) { - if (revision == s1d13xxxfb_revisions[i]) + for (i = 0; i < ARRAY_SIZE(s1d13xxxfb_prod_ids); i++) { + if (prod_id == s1d13xxxfb_prod_ids[i]) { + /* looks like we got it in our list */ + default_par->prod_id = prod_id; + default_par->revision = revision; ret = 0; + break; + } } - if (!ret) + if (!ret) { + printk(KERN_INFO PFX "chip production id %i = %s\n", + prod_id, s1d13xxxfb_prod_names[i]); printk(KERN_INFO PFX "chip revision %i\n", revision); - else { - printk(KERN_INFO PFX "unknown chip revision %i\n", revision); + } else { + printk(KERN_INFO PFX + "unknown chip production id %i, revision %i\n", + prod_id, revision); + printk(KERN_INFO PFX "please contant maintainer\n"); goto bail; } diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c new file mode 100644 index 000000000000..5e9c6302433b --- /dev/null +++ b/drivers/video/s3c-fb.c @@ -0,0 +1,1036 @@ +/* linux/drivers/video/s3c-fb.c + * + * Copyright 2008 Openmoko Inc. + * Copyright 2008 Simtec Electronics + * Ben Dooks <ben@simtec.co.uk> + * http://armlinux.simtec.co.uk/ + * + * Samsung SoC Framebuffer driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/gfp.h> +#include <linux/clk.h> +#include <linux/fb.h> +#include <linux/io.h> + +#include <mach/map.h> +#include <mach/regs-fb.h> +#include <plat/fb.h> + +/* This driver will export a number of framebuffer interfaces depending + * on the configuration passed in via the platform data. Each fb instance + * maps to a hardware window. Currently there is no support for runtime + * setting of the alpha-blending functions that each window has, so only + * window 0 is actually useful. + * + * Window 0 is treated specially, it is used for the basis of the LCD + * output timings and as the control for the output power-down state. +*/ + +/* note, some of the functions that get called are derived from including + * <mach/regs-fb.h> as they are specific to the architecture that the code + * is being built for. +*/ + +#ifdef CONFIG_FB_S3C_DEBUG_REGWRITE +#undef writel +#define writel(v, r) do { \ + printk(KERN_DEBUG "%s: %08x => %p\n", __func__, (unsigned int)v, r); \ + __raw_writel(v, r); } while(0) +#endif /* FB_S3C_DEBUG_REGWRITE */ + +struct s3c_fb; + +/** + * struct s3c_fb_win - per window private data for each framebuffer. + * @windata: The platform data supplied for the window configuration. + * @parent: The hardware that this window is part of. + * @fbinfo: Pointer pack to the framebuffer info for this window. + * @palette_buffer: Buffer/cache to hold palette entries. + * @pseudo_palette: For use in TRUECOLOUR modes for entries 0..15/ + * @index: The window number of this window. + * @palette: The bitfields for changing r/g/b into a hardware palette entry. + */ +struct s3c_fb_win { + struct s3c_fb_pd_win *windata; + struct s3c_fb *parent; + struct fb_info *fbinfo; + struct s3c_fb_palette palette; + + u32 *palette_buffer; + u32 pseudo_palette[16]; + unsigned int index; +}; + +/** + * struct s3c_fb - overall hardware state of the hardware + * @dev: The device that we bound to, for printing, etc. + * @regs_res: The resource we claimed for the IO registers. + * @bus_clk: The clk (hclk) feeding our interface and possibly pixclk. + * @regs: The mapped hardware registers. + * @enabled: A bitmask of enabled hardware windows. + * @pdata: The platform configuration data passed with the device. + * @windows: The hardware windows that have been claimed. + */ +struct s3c_fb { + struct device *dev; + struct resource *regs_res; + struct clk *bus_clk; + void __iomem *regs; + + unsigned char enabled; + + struct s3c_fb_platdata *pdata; + struct s3c_fb_win *windows[S3C_FB_MAX_WIN]; +}; + +/** + * s3c_fb_win_has_palette() - determine if a mode has a palette + * @win: The window number being queried. + * @bpp: The number of bits per pixel to test. + * + * Work out if the given window supports palletised data at the specified bpp. + */ +static int s3c_fb_win_has_palette(unsigned int win, unsigned int bpp) +{ + return s3c_fb_win_pal_size(win) <= (1 << bpp); +} + +/** + * s3c_fb_check_var() - framebuffer layer request to verify a given mode. + * @var: The screen information to verify. + * @info: The framebuffer device. + * + * Framebuffer layer call to verify the given information and allow us to + * update various information depending on the hardware capabilities. + */ +static int s3c_fb_check_var(struct fb_var_screeninfo *var, + struct fb_info *info) +{ + struct s3c_fb_win *win = info->par; + struct s3c_fb_pd_win *windata = win->windata; + struct s3c_fb *sfb = win->parent; + + dev_dbg(sfb->dev, "checking parameters\n"); + + var->xres_virtual = max((unsigned int)windata->virtual_x, var->xres); + var->yres_virtual = max((unsigned int)windata->virtual_y, var->yres); + + if (!s3c_fb_validate_win_bpp(win->index, var->bits_per_pixel)) { + dev_dbg(sfb->dev, "win %d: unsupported bpp %d\n", + win->index, var->bits_per_pixel); + return -EINVAL; + } + + /* always ensure these are zero, for drop through cases below */ + var->transp.offset = 0; + var->transp.length = 0; + + switch (var->bits_per_pixel) { + case 1: + case 2: + case 4: + case 8: + if (!s3c_fb_win_has_palette(win->index, var->bits_per_pixel)) { + /* non palletised, A:1,R:2,G:3,B:2 mode */ + var->red.offset = 4; + var->green.offset = 2; + var->blue.offset = 0; + var->red.length = 5; + var->green.length = 3; + var->blue.length = 2; + var->transp.offset = 7; + var->transp.length = 1; + } else { + var->red.offset = 0; + var->red.length = var->bits_per_pixel; + var->green = var->red; + var->blue = var->red; + } + break; + + case 19: + /* 666 with one bit alpha/transparency */ + var->transp.offset = 18; + var->transp.length = 1; + case 18: + var->bits_per_pixel = 32; + + /* 666 format */ + var->red.offset = 12; + var->green.offset = 6; + var->blue.offset = 0; + var->red.length = 6; + var->green.length = 6; + var->blue.length = 6; + break; + + case 16: + /* 16 bpp, 565 format */ + var->red.offset = 11; + var->green.offset = 5; + var->blue.offset = 0; + var->red.length = 5; + var->green.length = 6; + var->blue.length = 5; + break; + + case 28: + case 25: + var->transp.length = var->bits_per_pixel - 24; + var->transp.offset = 24; + /* drop through */ + case 24: + /* our 24bpp is unpacked, so 32bpp */ + var->bits_per_pixel = 32; + case 32: + var->red.offset = 16; + var->red.length = 8; + var->green.offset = 8; + var->green.length = 8; + var->blue.offset = 0; + var->blue.length = 8; + break; + + default: + dev_err(sfb->dev, "invalid bpp\n"); + } + + dev_dbg(sfb->dev, "%s: verified parameters\n", __func__); + return 0; +} + +/** + * s3c_fb_calc_pixclk() - calculate the divider to create the pixel clock. + * @sfb: The hardware state. + * @pixclock: The pixel clock wanted, in picoseconds. + * + * Given the specified pixel clock, work out the necessary divider to get + * close to the output frequency. + */ +static int s3c_fb_calc_pixclk(struct s3c_fb *sfb, unsigned int pixclk) +{ + unsigned long clk = clk_get_rate(sfb->bus_clk); + unsigned long long tmp; + unsigned int result; + + tmp = (unsigned long long)clk; + tmp *= pixclk; + + do_div(tmp, 1000000000UL); + result = (unsigned int)tmp / 1000; + + dev_dbg(sfb->dev, "pixclk=%u, clk=%lu, div=%d (%lu)\n", + pixclk, clk, result, clk / result); + + return result; +} + +/** + * s3c_fb_align_word() - align pixel count to word boundary + * @bpp: The number of bits per pixel + * @pix: The value to be aligned. + * + * Align the given pixel count so that it will start on an 32bit word + * boundary. + */ +static int s3c_fb_align_word(unsigned int bpp, unsigned int pix) +{ + int pix_per_word; + + if (bpp > 16) + return pix; + + pix_per_word = (8 * 32) / bpp; + return ALIGN(pix, pix_per_word); +} + +/** + * s3c_fb_set_par() - framebuffer request to set new framebuffer state. + * @info: The framebuffer to change. + * + * Framebuffer layer request to set a new mode for the specified framebuffer + */ +static int s3c_fb_set_par(struct fb_info *info) +{ + struct fb_var_screeninfo *var = &info->var; + struct s3c_fb_win *win = info->par; + struct s3c_fb *sfb = win->parent; + void __iomem *regs = sfb->regs; + int win_no = win->index; + u32 data; + u32 pagewidth; + int clkdiv; + + dev_dbg(sfb->dev, "setting framebuffer parameters\n"); + + switch (var->bits_per_pixel) { + case 32: + case 24: + case 16: + case 12: + info->fix.visual = FB_VISUAL_TRUECOLOR; + break; + case 8: + if (s3c_fb_win_has_palette(win_no, 8)) + info->fix.visual = FB_VISUAL_PSEUDOCOLOR; + else + info->fix.visual = FB_VISUAL_TRUECOLOR; + break; + case 1: + info->fix.visual = FB_VISUAL_MONO01; + break; + default: + info->fix.visual = FB_VISUAL_PSEUDOCOLOR; + break; + } + + info->fix.line_length = (var->xres_virtual * var->bits_per_pixel) / 8; + + /* disable the window whilst we update it */ + writel(0, regs + WINCON(win_no)); + + /* use window 0 as the basis for the lcd output timings */ + + if (win_no == 0) { + clkdiv = s3c_fb_calc_pixclk(sfb, var->pixclock); + + data = sfb->pdata->vidcon0; + data &= ~(VIDCON0_CLKVAL_F_MASK | VIDCON0_CLKDIR); + + if (clkdiv > 1) + data |= VIDCON0_CLKVAL_F(clkdiv-1) | VIDCON0_CLKDIR; + else + data &= ~VIDCON0_CLKDIR; /* 1:1 clock */ + + /* write the timing data to the panel */ + + data |= VIDCON0_ENVID | VIDCON0_ENVID_F; + writel(data, regs + VIDCON0); + + data = VIDTCON0_VBPD(var->upper_margin - 1) | + VIDTCON0_VFPD(var->lower_margin - 1) | + VIDTCON0_VSPW(var->vsync_len - 1); + + writel(data, regs + VIDTCON0); + + data = VIDTCON1_HBPD(var->left_margin - 1) | + VIDTCON1_HFPD(var->right_margin - 1) | + VIDTCON1_HSPW(var->hsync_len - 1); + + writel(data, regs + VIDTCON1); + + data = VIDTCON2_LINEVAL(var->yres - 1) | + VIDTCON2_HOZVAL(var->xres - 1); + writel(data, regs + VIDTCON2); + } + + /* write the buffer address */ + + writel(info->fix.smem_start, regs + VIDW_BUF_START(win_no)); + + data = info->fix.smem_start + info->fix.line_length * var->yres; + writel(data, regs + VIDW_BUF_END(win_no)); + + pagewidth = (var->xres * var->bits_per_pixel) >> 3; + data = VIDW_BUF_SIZE_OFFSET(info->fix.line_length - pagewidth) | + VIDW_BUF_SIZE_PAGEWIDTH(pagewidth); + writel(data, regs + VIDW_BUF_SIZE(win_no)); + + /* write 'OSD' registers to control position of framebuffer */ + + data = VIDOSDxA_TOPLEFT_X(0) | VIDOSDxA_TOPLEFT_Y(0); + writel(data, regs + VIDOSD_A(win_no)); + + data = VIDOSDxB_BOTRIGHT_X(s3c_fb_align_word(var->bits_per_pixel, + var->xres - 1)) | + VIDOSDxB_BOTRIGHT_Y(var->yres - 1); + + writel(data, regs + VIDOSD_B(win_no)); + + data = var->xres * var->yres; + if (s3c_fb_has_osd_d(win_no)) { + writel(data, regs + VIDOSD_D(win_no)); + writel(0, regs + VIDOSD_C(win_no)); + } else + writel(data, regs + VIDOSD_C(win_no)); + + data = WINCONx_ENWIN; + + /* note, since we have to round up the bits-per-pixel, we end up + * relying on the bitfield information for r/g/b/a to work out + * exactly which mode of operation is intended. */ + + switch (var->bits_per_pixel) { + case 1: + data |= WINCON0_BPPMODE_1BPP; + data |= WINCONx_BITSWP; + data |= WINCONx_BURSTLEN_4WORD; + break; + case 2: + data |= WINCON0_BPPMODE_2BPP; + data |= WINCONx_BITSWP; + data |= WINCONx_BURSTLEN_8WORD; + break; + case 4: + data |= WINCON0_BPPMODE_4BPP; + data |= WINCONx_BITSWP; + data |= WINCONx_BURSTLEN_8WORD; + break; + case 8: + if (var->transp.length != 0) + data |= WINCON1_BPPMODE_8BPP_1232; + else + data |= WINCON0_BPPMODE_8BPP_PALETTE; + data |= WINCONx_BURSTLEN_8WORD; + data |= WINCONx_BYTSWP; + break; + case 16: + if (var->transp.length != 0) + data |= WINCON1_BPPMODE_16BPP_A1555; + else + data |= WINCON0_BPPMODE_16BPP_565; + data |= WINCONx_HAWSWP; + data |= WINCONx_BURSTLEN_16WORD; + break; + case 24: + case 32: + if (var->red.length == 6) { + if (var->transp.length != 0) + data |= WINCON1_BPPMODE_19BPP_A1666; + else + data |= WINCON1_BPPMODE_18BPP_666; + } else if (var->transp.length != 0) + data |= WINCON1_BPPMODE_25BPP_A1888; + else + data |= WINCON0_BPPMODE_24BPP_888; + + data |= WINCONx_BURSTLEN_16WORD; + break; + } + + writel(data, regs + WINCON(win_no)); + writel(0x0, regs + WINxMAP(win_no)); + + return 0; +} + +/** + * s3c_fb_update_palette() - set or schedule a palette update. + * @sfb: The hardware information. + * @win: The window being updated. + * @reg: The palette index being changed. + * @value: The computed palette value. + * + * Change the value of a palette register, either by directly writing to + * the palette (this requires the palette RAM to be disconnected from the + * hardware whilst this is in progress) or schedule the update for later. + * + * At the moment, since we have no VSYNC interrupt support, we simply set + * the palette entry directly. + */ +static void s3c_fb_update_palette(struct s3c_fb *sfb, + struct s3c_fb_win *win, + unsigned int reg, + u32 value) +{ + void __iomem *palreg; + u32 palcon; + + palreg = sfb->regs + s3c_fb_pal_reg(win->index, reg); + + dev_dbg(sfb->dev, "%s: win %d, reg %d (%p): %08x\n", + __func__, win->index, reg, palreg, value); + + win->palette_buffer[reg] = value; + + palcon = readl(sfb->regs + WPALCON); + writel(palcon | WPALCON_PAL_UPDATE, sfb->regs + WPALCON); + + if (s3c_fb_pal_is16(win->index)) + writew(value, palreg); + else + writel(value, palreg); + + writel(palcon, sfb->regs + WPALCON); +} + +static inline unsigned int chan_to_field(unsigned int chan, + struct fb_bitfield *bf) +{ + chan &= 0xffff; + chan >>= 16 - bf->length; + return chan << bf->offset; +} + +/** + * s3c_fb_setcolreg() - framebuffer layer request to change palette. + * @regno: The palette index to change. + * @red: The red field for the palette data. + * @green: The green field for the palette data. + * @blue: The blue field for the palette data. + * @trans: The transparency (alpha) field for the palette data. + * @info: The framebuffer being changed. + */ +static int s3c_fb_setcolreg(unsigned regno, + unsigned red, unsigned green, unsigned blue, + unsigned transp, struct fb_info *info) +{ + struct s3c_fb_win *win = info->par; + struct s3c_fb *sfb = win->parent; + unsigned int val; + + dev_dbg(sfb->dev, "%s: win %d: %d => rgb=%d/%d/%d\n", + __func__, win->index, regno, red, green, blue); + + switch (info->fix.visual) { + case FB_VISUAL_TRUECOLOR: + /* true-colour, use pseudo-palette */ + + if (regno < 16) { + u32 *pal = info->pseudo_palette; + + val = chan_to_field(red, &info->var.red); + val |= chan_to_field(green, &info->var.green); + val |= chan_to_field(blue, &info->var.blue); + + pal[regno] = val; + } + break; + + case FB_VISUAL_PSEUDOCOLOR: + if (regno < s3c_fb_win_pal_size(win->index)) { + val = chan_to_field(red, &win->palette.r); + val |= chan_to_field(green, &win->palette.g); + val |= chan_to_field(blue, &win->palette.b); + + s3c_fb_update_palette(sfb, win, regno, val); + } + + break; + + default: + return 1; /* unknown type */ + } + + return 0; +} + +/** + * s3c_fb_enable() - Set the state of the main LCD output + * @sfb: The main framebuffer state. + * @enable: The state to set. + */ +static void s3c_fb_enable(struct s3c_fb *sfb, int enable) +{ + u32 vidcon0 = readl(sfb->regs + VIDCON0); + + if (enable) + vidcon0 |= VIDCON0_ENVID | VIDCON0_ENVID_F; + else { + /* see the note in the framebuffer datasheet about + * why you cannot take both of these bits down at the + * same time. */ + + if (!(vidcon0 & VIDCON0_ENVID)) + return; + + vidcon0 |= VIDCON0_ENVID; + vidcon0 &= ~VIDCON0_ENVID_F; + } + + writel(vidcon0, sfb->regs + VIDCON0); +} + +/** + * s3c_fb_blank() - blank or unblank the given window + * @blank_mode: The blank state from FB_BLANK_* + * @info: The framebuffer to blank. + * + * Framebuffer layer request to change the power state. + */ +static int s3c_fb_blank(int blank_mode, struct fb_info *info) +{ + struct s3c_fb_win *win = info->par; + struct s3c_fb *sfb = win->parent; + unsigned int index = win->index; + u32 wincon; + + dev_dbg(sfb->dev, "blank mode %d\n", blank_mode); + + wincon = readl(sfb->regs + WINCON(index)); + + switch (blank_mode) { + case FB_BLANK_POWERDOWN: + wincon &= ~WINCONx_ENWIN; + sfb->enabled &= ~(1 << index); + /* fall through to FB_BLANK_NORMAL */ + + case FB_BLANK_NORMAL: + /* disable the DMA and display 0x0 (black) */ + writel(WINxMAP_MAP | WINxMAP_MAP_COLOUR(0x0), + sfb->regs + WINxMAP(index)); + break; + + case FB_BLANK_UNBLANK: + writel(0x0, sfb->regs + WINxMAP(index)); + wincon |= WINCONx_ENWIN; + sfb->enabled |= (1 << index); + break; + + case FB_BLANK_VSYNC_SUSPEND: + case FB_BLANK_HSYNC_SUSPEND: + default: + return 1; + } + + writel(wincon, sfb->regs + WINCON(index)); + + /* Check the enabled state to see if we need to be running the + * main LCD interface, as if there are no active windows then + * it is highly likely that we also do not need to output + * anything. + */ + + /* We could do something like the following code, but the current + * system of using framebuffer events means that we cannot make + * the distinction between just window 0 being inactive and all + * the windows being down. + * + * s3c_fb_enable(sfb, sfb->enabled ? 1 : 0); + */ + + /* we're stuck with this until we can do something about overriding + * the power control using the blanking event for a single fb. + */ + if (index == 0) + s3c_fb_enable(sfb, blank_mode != FB_BLANK_POWERDOWN ? 1 : 0); + + return 0; +} + +static struct fb_ops s3c_fb_ops = { + .owner = THIS_MODULE, + .fb_check_var = s3c_fb_check_var, + .fb_set_par = s3c_fb_set_par, + .fb_blank = s3c_fb_blank, + .fb_setcolreg = s3c_fb_setcolreg, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, +}; + +/** + * s3c_fb_alloc_memory() - allocate display memory for framebuffer window + * @sfb: The base resources for the hardware. + * @win: The window to initialise memory for. + * + * Allocate memory for the given framebuffer. + */ +static int __devinit s3c_fb_alloc_memory(struct s3c_fb *sfb, + struct s3c_fb_win *win) +{ + struct s3c_fb_pd_win *windata = win->windata; + unsigned int real_size, virt_size, size; + struct fb_info *fbi = win->fbinfo; + dma_addr_t map_dma; + + dev_dbg(sfb->dev, "allocating memory for display\n"); + + real_size = windata->win_mode.xres * windata->win_mode.yres; + virt_size = windata->virtual_x * windata->virtual_y; + + dev_dbg(sfb->dev, "real_size=%u (%u.%u), virt_size=%u (%u.%u)\n", + real_size, windata->win_mode.xres, windata->win_mode.yres, + virt_size, windata->virtual_x, windata->virtual_y); + + size = (real_size > virt_size) ? real_size : virt_size; + size *= (windata->max_bpp > 16) ? 32 : windata->max_bpp; + size /= 8; + + fbi->fix.smem_len = size; + size = PAGE_ALIGN(size); + + dev_dbg(sfb->dev, "want %u bytes for window\n", size); + + fbi->screen_base = dma_alloc_writecombine(sfb->dev, size, + &map_dma, GFP_KERNEL); + if (!fbi->screen_base) + return -ENOMEM; + + dev_dbg(sfb->dev, "mapped %x to %p\n", + (unsigned int)map_dma, fbi->screen_base); + + memset(fbi->screen_base, 0x0, size); + fbi->fix.smem_start = map_dma; + + return 0; +} + +/** + * s3c_fb_free_memory() - free the display memory for the given window + * @sfb: The base resources for the hardware. + * @win: The window to free the display memory for. + * + * Free the display memory allocated by s3c_fb_alloc_memory(). + */ +static void s3c_fb_free_memory(struct s3c_fb *sfb, struct s3c_fb_win *win) +{ + struct fb_info *fbi = win->fbinfo; + + dma_free_writecombine(sfb->dev, PAGE_ALIGN(fbi->fix.smem_len), + fbi->screen_base, fbi->fix.smem_start); +} + +/** + * s3c_fb_release_win() - release resources for a framebuffer window. + * @win: The window to cleanup the resources for. + * + * Release the resources that where claimed for the hardware window, + * such as the framebuffer instance and any memory claimed for it. + */ +static void s3c_fb_release_win(struct s3c_fb *sfb, struct s3c_fb_win *win) +{ + fb_dealloc_cmap(&win->fbinfo->cmap); + unregister_framebuffer(win->fbinfo); + s3c_fb_free_memory(sfb, win); +} + +/** + * s3c_fb_probe_win() - register an hardware window + * @sfb: The base resources for the hardware + * @res: Pointer to where to place the resultant window. + * + * Allocate and do the basic initialisation for one of the hardware's graphics + * windows. + */ +static int __devinit s3c_fb_probe_win(struct s3c_fb *sfb, unsigned int win_no, + struct s3c_fb_win **res) +{ + struct fb_var_screeninfo *var; + struct fb_videomode *initmode; + struct s3c_fb_pd_win *windata; + struct s3c_fb_win *win; + struct fb_info *fbinfo; + int palette_size; + int ret; + + dev_dbg(sfb->dev, "probing window %d\n", win_no); + + palette_size = s3c_fb_win_pal_size(win_no); + + fbinfo = framebuffer_alloc(sizeof(struct s3c_fb_win) + + palette_size * sizeof(u32), sfb->dev); + if (!fbinfo) { + dev_err(sfb->dev, "failed to allocate framebuffer\n"); + return -ENOENT; + } + + windata = sfb->pdata->win[win_no]; + initmode = &windata->win_mode; + + WARN_ON(windata->max_bpp == 0); + WARN_ON(windata->win_mode.xres == 0); + WARN_ON(windata->win_mode.yres == 0); + + win = fbinfo->par; + var = &fbinfo->var; + win->fbinfo = fbinfo; + win->parent = sfb; + win->windata = windata; + win->index = win_no; + win->palette_buffer = (u32 *)(win + 1); + + ret = s3c_fb_alloc_memory(sfb, win); + if (ret) { + dev_err(sfb->dev, "failed to allocate display memory\n"); + goto err_framebuffer; + } + + /* setup the r/b/g positions for the window's palette */ + s3c_fb_init_palette(win_no, &win->palette); + + /* setup the initial video mode from the window */ + fb_videomode_to_var(&fbinfo->var, initmode); + + fbinfo->fix.type = FB_TYPE_PACKED_PIXELS; + fbinfo->fix.accel = FB_ACCEL_NONE; + fbinfo->var.activate = FB_ACTIVATE_NOW; + fbinfo->var.vmode = FB_VMODE_NONINTERLACED; + fbinfo->var.bits_per_pixel = windata->default_bpp; + fbinfo->fbops = &s3c_fb_ops; + fbinfo->flags = FBINFO_FLAG_DEFAULT; + fbinfo->pseudo_palette = &win->pseudo_palette; + + /* prepare to actually start the framebuffer */ + + ret = s3c_fb_check_var(&fbinfo->var, fbinfo); + if (ret < 0) { + dev_err(sfb->dev, "check_var failed on initial video params\n"); + goto err_alloc_mem; + } + + /* create initial colour map */ + + ret = fb_alloc_cmap(&fbinfo->cmap, s3c_fb_win_pal_size(win_no), 1); + if (ret == 0) + fb_set_cmap(&fbinfo->cmap, fbinfo); + else + dev_err(sfb->dev, "failed to allocate fb cmap\n"); + + s3c_fb_set_par(fbinfo); + + dev_dbg(sfb->dev, "about to register framebuffer\n"); + + /* run the check_var and set_par on our configuration. */ + + ret = register_framebuffer(fbinfo); + if (ret < 0) { + dev_err(sfb->dev, "failed to register framebuffer\n"); + goto err_alloc_mem; + } + + *res = win; + dev_info(sfb->dev, "window %d: fb %s\n", win_no, fbinfo->fix.id); + + return 0; + +err_alloc_mem: + s3c_fb_free_memory(sfb, win); + +err_framebuffer: + unregister_framebuffer(fbinfo); + return ret; +} + +/** + * s3c_fb_clear_win() - clear hardware window registers. + * @sfb: The base resources for the hardware. + * @win: The window to process. + * + * Reset the specific window registers to a known state. + */ +static void s3c_fb_clear_win(struct s3c_fb *sfb, int win) +{ + void __iomem *regs = sfb->regs; + + writel(0, regs + WINCON(win)); + writel(0xffffff, regs + WxKEYCONy(win, 0)); + writel(0xffffff, regs + WxKEYCONy(win, 1)); + + writel(0, regs + VIDOSD_A(win)); + writel(0, regs + VIDOSD_B(win)); + writel(0, regs + VIDOSD_C(win)); +} + +static int __devinit s3c_fb_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct s3c_fb_platdata *pd; + struct s3c_fb *sfb; + struct resource *res; + int win; + int ret = 0; + + pd = pdev->dev.platform_data; + if (!pd) { + dev_err(dev, "no platform data specified\n"); + return -EINVAL; + } + + sfb = kzalloc(sizeof(struct s3c_fb), GFP_KERNEL); + if (!sfb) { + dev_err(dev, "no memory for framebuffers\n"); + return -ENOMEM; + } + + sfb->dev = dev; + sfb->pdata = pd; + + sfb->bus_clk = clk_get(dev, "lcd"); + if (IS_ERR(sfb->bus_clk)) { + dev_err(dev, "failed to get bus clock\n"); + goto err_sfb; + } + + clk_enable(sfb->bus_clk); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "failed to find registers\n"); + ret = -ENOENT; + goto err_clk; + } + + sfb->regs_res = request_mem_region(res->start, resource_size(res), + dev_name(dev)); + if (!sfb->regs_res) { + dev_err(dev, "failed to claim register region\n"); + ret = -ENOENT; + goto err_clk; + } + + sfb->regs = ioremap(res->start, resource_size(res)); + if (!sfb->regs) { + dev_err(dev, "failed to map registers\n"); + ret = -ENXIO; + goto err_req_region; + } + + dev_dbg(dev, "got resources (regs %p), probing windows\n", sfb->regs); + + /* setup gpio and output polarity controls */ + + pd->setup_gpio(); + + writel(pd->vidcon1, sfb->regs + VIDCON1); + + /* zero all windows before we do anything */ + + for (win = 0; win < S3C_FB_MAX_WIN; win++) + s3c_fb_clear_win(sfb, win); + + /* we have the register setup, start allocating framebuffers */ + + for (win = 0; win < S3C_FB_MAX_WIN; win++) { + if (!pd->win[win]) + continue; + + ret = s3c_fb_probe_win(sfb, win, &sfb->windows[win]); + if (ret < 0) { + dev_err(dev, "failed to create window %d\n", win); + for (; win >= 0; win--) + s3c_fb_release_win(sfb, sfb->windows[win]); + goto err_ioremap; + } + } + + platform_set_drvdata(pdev, sfb); + + return 0; + +err_ioremap: + iounmap(sfb->regs); + +err_req_region: + release_resource(sfb->regs_res); + kfree(sfb->regs_res); + +err_clk: + clk_disable(sfb->bus_clk); + clk_put(sfb->bus_clk); + +err_sfb: + kfree(sfb); + return ret; +} + +/** + * s3c_fb_remove() - Cleanup on module finalisation + * @pdev: The platform device we are bound to. + * + * Shutdown and then release all the resources that the driver allocated + * on initialisation. + */ +static int __devexit s3c_fb_remove(struct platform_device *pdev) +{ + struct s3c_fb *sfb = platform_get_drvdata(pdev); + int win; + + for (win = 0; win <= S3C_FB_MAX_WIN; win++) + s3c_fb_release_win(sfb, sfb->windows[win]); + + iounmap(sfb->regs); + + clk_disable(sfb->bus_clk); + clk_put(sfb->bus_clk); + + release_resource(sfb->regs_res); + kfree(sfb->regs_res); + + kfree(sfb); + + return 0; +} + +#ifdef CONFIG_PM +static int s3c_fb_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct s3c_fb *sfb = platform_get_drvdata(pdev); + struct s3c_fb_win *win; + int win_no; + + for (win_no = S3C_FB_MAX_WIN; win_no >= 0; win_no--) { + win = sfb->windows[win_no]; + if (!win) + continue; + + /* use the blank function to push into power-down */ + s3c_fb_blank(FB_BLANK_POWERDOWN, win->fbinfo); + } + + clk_disable(sfb->bus_clk); + return 0; +} + +static int s3c_fb_resume(struct platform_device *pdev) +{ + struct s3c_fb *sfb = platform_get_drvdata(pdev); + struct s3c_fb_win *win; + int win_no; + + clk_enable(sfb->bus_clk); + + for (win_no = 0; win_no < S3C_FB_MAX_WIN; win_no++) { + win = sfb->windows[win_no]; + if (!win) + continue; + + dev_dbg(&pdev->dev, "resuming window %d\n", win_no); + s3c_fb_set_par(win->fbinfo); + } + + return 0; +} +#else +#define s3c_fb_suspend NULL +#define s3c_fb_resume NULL +#endif + +static struct platform_driver s3c_fb_driver = { + .probe = s3c_fb_probe, + .remove = s3c_fb_remove, + .suspend = s3c_fb_suspend, + .resume = s3c_fb_resume, + .driver = { + .name = "s3c-fb", + .owner = THIS_MODULE, + }, +}; + +static int __init s3c_fb_init(void) +{ + return platform_driver_register(&s3c_fb_driver); +} + +static void __exit s3c_fb_cleanup(void) +{ + platform_driver_unregister(&s3c_fb_driver); +} + +module_init(s3c_fb_init); +module_exit(s3c_fb_cleanup); + +MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>"); +MODULE_DESCRIPTION("Samsung S3C SoC Framebuffer driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:s3c-fb"); diff --git a/drivers/video/sgivwfb.c b/drivers/video/sgivwfb.c index f5252c2552fd..bba53714a7b1 100644 --- a/drivers/video/sgivwfb.c +++ b/drivers/video/sgivwfb.c @@ -837,6 +837,8 @@ static int sgivwfb_remove(struct platform_device *dev) iounmap(par->regs); iounmap(info->screen_base); release_mem_region(DBE_REG_PHYS, DBE_REG_SIZE); + fb_dealloc_cmap(&info->cmap); + framebuffer_release(info); } return 0; } diff --git a/drivers/video/skeletonfb.c b/drivers/video/skeletonfb.c index df5336561d13..a439159204a8 100644 --- a/drivers/video/skeletonfb.c +++ b/drivers/video/skeletonfb.c @@ -795,8 +795,9 @@ static int __devinit xxxfb_probe(struct pci_dev *dev, if (!retval || retval == 4) return -EINVAL; - /* This has to been done !!! */ - fb_alloc_cmap(&info->cmap, cmap_len, 0); + /* This has to be done! */ + if (fb_alloc_cmap(&info->cmap, cmap_len, 0)) + return -ENOMEM; /* * The following is done in the case of having hardware with a static @@ -820,8 +821,10 @@ static int __devinit xxxfb_probe(struct pci_dev *dev, */ /* xxxfb_set_par(info); */ - if (register_framebuffer(info) < 0) + if (register_framebuffer(info) < 0) { + fb_dealloc_cmap(&info->cmap); return -EINVAL; + } printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node, info->fix.id); pci_set_drvdata(dev, info); /* or platform_set_drvdata(pdev, info) */ diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c index dcd98793d568..eb5d73a06702 100644 --- a/drivers/video/sm501fb.c +++ b/drivers/video/sm501fb.c @@ -1525,7 +1525,10 @@ static int sm501fb_init_fb(struct fb_info *fb, } /* initialise and set the palette */ - fb_alloc_cmap(&fb->cmap, NR_PALETTE, 0); + if (fb_alloc_cmap(&fb->cmap, NR_PALETTE, 0)) { + dev_err(info->dev, "failed to allocate cmap memory\n"); + return -ENOMEM; + } fb_set_cmap(&fb->cmap, fb); ret = (fb->fbops->fb_check_var)(&fb->var, fb); diff --git a/drivers/video/sstfb.c b/drivers/video/sstfb.c index 5b11a00f49bc..609d0a521ca2 100644 --- a/drivers/video/sstfb.c +++ b/drivers/video/sstfb.c @@ -1421,13 +1421,16 @@ static int __devinit sstfb_probe(struct pci_dev *pdev, goto fail; } - fb_alloc_cmap(&info->cmap, 256, 0); + if (fb_alloc_cmap(&info->cmap, 256, 0)) { + printk(KERN_ERR "sstfb: can't alloc cmap memory.\n"); + goto fail; + } /* register fb */ info->device = &pdev->dev; if (register_framebuffer(info) < 0) { printk(KERN_ERR "sstfb: can't register framebuffer.\n"); - goto fail; + goto fail_register; } sstfb_clear_screen(info); @@ -1441,8 +1444,9 @@ static int __devinit sstfb_probe(struct pci_dev *pdev, return 0; -fail: +fail_register: fb_dealloc_cmap(&info->cmap); +fail: iounmap(info->screen_base); fail_fb_remap: iounmap(par->mmio_vbase); diff --git a/drivers/video/stifb.c b/drivers/video/stifb.c index 166481402412..eabaad765aeb 100644 --- a/drivers/video/stifb.c +++ b/drivers/video/stifb.c @@ -1262,24 +1262,25 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) info->flags = FBINFO_DEFAULT; info->pseudo_palette = &fb->pseudo_palette; - /* This has to been done !!! */ - fb_alloc_cmap(&info->cmap, NR_PALETTE, 0); + /* This has to be done !!! */ + if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0)) + goto out_err1; stifb_init_display(fb); if (!request_mem_region(fix->smem_start, fix->smem_len, "stifb fb")) { printk(KERN_ERR "stifb: cannot reserve fb region 0x%04lx-0x%04lx\n", fix->smem_start, fix->smem_start+fix->smem_len); - goto out_err1; + goto out_err2; } if (!request_mem_region(fix->mmio_start, fix->mmio_len, "stifb mmio")) { printk(KERN_ERR "stifb: cannot reserve sti mmio region 0x%04lx-0x%04lx\n", fix->mmio_start, fix->mmio_start+fix->mmio_len); - goto out_err2; + goto out_err3; } if (register_framebuffer(&fb->info) < 0) - goto out_err3; + goto out_err4; sti->info = info; /* save for unregister_framebuffer() */ @@ -1297,13 +1298,14 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) return 0; -out_err3: +out_err4: release_mem_region(fix->mmio_start, fix->mmio_len); -out_err2: +out_err3: release_mem_region(fix->smem_start, fix->smem_len); +out_err2: + fb_dealloc_cmap(&info->cmap); out_err1: iounmap(info->screen_base); - fb_dealloc_cmap(&info->cmap); out_err0: kfree(fb); return -ENXIO; diff --git a/drivers/video/sunxvr500.c b/drivers/video/sunxvr500.c index c2ba51b7ea18..18b950706cad 100644 --- a/drivers/video/sunxvr500.c +++ b/drivers/video/sunxvr500.c @@ -349,11 +349,14 @@ static int __devinit e3d_pci_register(struct pci_dev *pdev, if (err < 0) { printk(KERN_ERR "e3d: Could not register framebuffer %s\n", pci_name(pdev)); - goto err_unmap_fb; + goto err_free_cmap; } return 0; +err_free_cmap: + fb_dealloc_cmap(&info->cmap); + err_unmap_fb: iounmap(ep->fb_base); @@ -389,6 +392,7 @@ static void __devexit e3d_pci_unregister(struct pci_dev *pdev) pci_release_region(pdev, 0); pci_release_region(pdev, 1); + fb_dealloc_cmap(&info->cmap); framebuffer_release(info); pci_disable_device(pdev); diff --git a/drivers/video/tdfxfb.c b/drivers/video/tdfxfb.c index 14bd3f3680b8..ee64771fbe3d 100644 --- a/drivers/video/tdfxfb.c +++ b/drivers/video/tdfxfb.c @@ -1393,6 +1393,7 @@ static void __devexit tdfxfb_remove(struct pci_dev *pdev) release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); pci_set_drvdata(pdev, NULL); + fb_dealloc_cmap(&info->cmap); framebuffer_release(info); } diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c index 680642c089c9..a86046ff60ad 100644 --- a/drivers/video/tgafb.c +++ b/drivers/video/tgafb.c @@ -1663,7 +1663,7 @@ tgafb_register(struct device *dev) if (register_framebuffer(info) < 0) { printk(KERN_ERR "tgafb: Could not register framebuffer\n"); ret = -EINVAL; - goto err1; + goto err2; } if (tga_bus_pci) { @@ -1682,6 +1682,8 @@ tgafb_register(struct device *dev) return 0; + err2: + fb_dealloc_cmap(&info->cmap); err1: if (mem_base) iounmap(mem_base); diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c index 479b2e79ad68..03a9c35e9f55 100644 --- a/drivers/video/tridentfb.c +++ b/drivers/video/tridentfb.c @@ -2,7 +2,7 @@ * Frame buffer driver for Trident TGUI, Blade and Image series * * Copyright 2001, 2002 - Jani Monoses <jani@iv.ro> - * + * Copyright 2009 Krzysztof Helt <krzysztof.h1@wp.pl> * * CREDITS:(in order of appearance) * skeletonfb.c by Geert Uytterhoeven and other fb code in drivers/video @@ -490,7 +490,6 @@ static void tgui_copy_rect(struct tridentfb_par *par, /* * Accel functions called by the upper layers */ -#ifdef CONFIG_FB_TRIDENT_ACCEL static void tridentfb_fillrect(struct fb_info *info, const struct fb_fillrect *fr) { @@ -565,11 +564,6 @@ static int tridentfb_sync(struct fb_info *info) par->wait_engine(par); return 0; } -#else -#define tridentfb_fillrect cfb_fillrect -#define tridentfb_copyarea cfb_copyarea -#define tridentfb_imageblit cfb_imageblit -#endif /* CONFIG_FB_TRIDENT_ACCEL */ /* * Hardware access functions @@ -1333,9 +1327,7 @@ static struct fb_ops tridentfb_ops = { .fb_fillrect = tridentfb_fillrect, .fb_copyarea = tridentfb_copyarea, .fb_imageblit = tridentfb_imageblit, -#ifdef CONFIG_FB_TRIDENT_ACCEL .fb_sync = tridentfb_sync, -#endif }; static int __devinit trident_pci_probe(struct pci_dev *dev, @@ -1359,10 +1351,6 @@ static int __devinit trident_pci_probe(struct pci_dev *dev, chip_id = id->device; -#ifndef CONFIG_FB_TRIDENT_ACCEL - noaccel = 1; -#endif - /* If PCI id is 0x9660 then further detect chip type */ if (chip_id == TGUI9660) { @@ -1490,6 +1478,9 @@ static int __devinit trident_pci_probe(struct pci_dev *dev, } else info->flags |= FBINFO_HWACCEL_DISABLED; + if (is_blade(chip_id) && chip_id != BLADE3D) + info->flags |= FBINFO_READS_FAST; + info->pixmap.addr = kmalloc(4096, GFP_KERNEL); if (!info->pixmap.addr) { err = -ENOMEM; @@ -1563,6 +1554,7 @@ static void __devexit trident_pci_remove(struct pci_dev *dev) release_mem_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len); pci_set_drvdata(dev, NULL); kfree(info->pixmap.addr); + fb_dealloc_cmap(&info->cmap); framebuffer_release(info); } @@ -1663,4 +1655,5 @@ module_exit(tridentfb_exit); MODULE_AUTHOR("Jani Monoses <jani@iv.ro>"); MODULE_DESCRIPTION("Framebuffer driver for Trident cards"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("cyblafb"); diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index 74ae75899009..0b370aebdbfd 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -189,7 +189,7 @@ static int uvesafb_exec(struct uvesafb_ktask *task) uvfb_tasks[seq] = task; mutex_unlock(&uvfb_lock); - err = cn_netlink_send(m, 0, gfp_any()); + err = cn_netlink_send(m, 0, GFP_KERNEL); if (err == -ESRCH) { /* * Try to start the userspace helper if sending @@ -850,14 +850,16 @@ static int __devinit uvesafb_vbe_init_mode(struct fb_info *info) if (vbemode) { for (i = 0; i < par->vbe_modes_cnt; i++) { if (par->vbe_modes[i].mode_id == vbemode) { + modeid = i; + uvesafb_setup_var(&info->var, info, + &par->vbe_modes[modeid]); fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON, 60, - &info->var, info); + &info->var, info); /* * With pixclock set to 0, the default BIOS * timings will be used in set_par(). */ info->var.pixclock = 0; - modeid = i; goto gotmode; } } @@ -904,8 +906,11 @@ static int __devinit uvesafb_vbe_init_mode(struct fb_info *info) fb_videomode_to_var(&info->var, mode); } else { modeid = par->vbe_modes[0].mode_id; + uvesafb_setup_var(&info->var, info, + &par->vbe_modes[modeid]); fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON, 60, - &info->var, info); + &info->var, info); + goto gotmode; } } @@ -917,9 +922,9 @@ static int __devinit uvesafb_vbe_init_mode(struct fb_info *info) if (modeid == -1) return -EINVAL; -gotmode: uvesafb_setup_var(&info->var, info, &par->vbe_modes[modeid]); +gotmode: /* * If we are not VBE3.0+ compliant, we're done -- the BIOS will * ignore our timings anyway. @@ -1552,7 +1557,7 @@ static void __devinit uvesafb_init_info(struct fb_info *info, } info->flags = FBINFO_FLAG_DEFAULT | - (par->ypan) ? FBINFO_HWACCEL_YPAN : 0; + (par->ypan ? FBINFO_HWACCEL_YPAN : 0); if (!par->ypan) info->fbops->fb_pan_display = NULL; diff --git a/drivers/video/valkyriefb.c b/drivers/video/valkyriefb.c index 7b0cef9ca8f9..4bb9a0b18950 100644 --- a/drivers/video/valkyriefb.c +++ b/drivers/video/valkyriefb.c @@ -119,7 +119,7 @@ static void set_valkyrie_clock(unsigned char *params); static int valkyrie_var_to_par(struct fb_var_screeninfo *var, struct fb_par_valkyrie *par, const struct fb_info *fb_info); -static void valkyrie_init_info(struct fb_info *info, struct fb_info_valkyrie *p); +static int valkyrie_init_info(struct fb_info *info, struct fb_info_valkyrie *p); static void valkyrie_par_to_fix(struct fb_par_valkyrie *par, struct fb_fix_screeninfo *fix); static void valkyrie_init_fix(struct fb_fix_screeninfo *fix, struct fb_info_valkyrie *p); @@ -381,18 +381,22 @@ int __init valkyriefb_init(void) valkyrie_choose_mode(p); mac_vmode_to_var(default_vmode, default_cmode, &p->info.var); - valkyrie_init_info(&p->info, p); + err = valkyrie_init_info(&p->info, p); + if (err < 0) + goto out_free; valkyrie_init_fix(&p->info.fix, p); if (valkyriefb_set_par(&p->info)) /* "can't happen" */ printk(KERN_ERR "valkyriefb: can't set default video mode\n"); if ((err = register_framebuffer(&p->info)) != 0) - goto out_free; + goto out_cmap_free; printk(KERN_INFO "fb%d: valkyrie frame buffer device\n", p->info.node); return 0; + out_cmap_free: + fb_dealloc_cmap(&p->info.cmap); out_free: if (p->frame_buffer) iounmap(p->frame_buffer); @@ -538,14 +542,15 @@ static void valkyrie_par_to_fix(struct fb_par_valkyrie *par, /* ywrapstep, xpanstep, ypanstep */ } -static void __init valkyrie_init_info(struct fb_info *info, struct fb_info_valkyrie *p) +static int __init valkyrie_init_info(struct fb_info *info, + struct fb_info_valkyrie *p) { info->fbops = &valkyriefb_ops; info->screen_base = p->frame_buffer + 0x1000; info->flags = FBINFO_DEFAULT; info->pseudo_palette = p->pseudo_palette; - fb_alloc_cmap(&info->cmap, 256, 0); info->par = &p->par; + return fb_alloc_cmap(&info->cmap, 256, 0); } diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c index e16322d157d0..d6856f43d241 100644 --- a/drivers/video/vesafb.c +++ b/drivers/video/vesafb.c @@ -438,7 +438,7 @@ static int __init vesafb_probe(struct platform_device *dev) info->var = vesafb_defined; info->fix = vesafb_fix; info->flags = FBINFO_FLAG_DEFAULT | - (ypan) ? FBINFO_HWACCEL_YPAN : 0; + (ypan ? FBINFO_HWACCEL_YPAN : 0); if (!ypan) info->fbops->fb_pan_display = NULL; diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c index 93fe08d6c78f..cc919ae46571 100644 --- a/drivers/video/vfb.c +++ b/drivers/video/vfb.c @@ -543,6 +543,7 @@ static int vfb_remove(struct platform_device *dev) if (info) { unregister_framebuffer(info); rvfree(videomemory, videomemorysize); + fb_dealloc_cmap(&info->cmap); framebuffer_release(info); } return 0; diff --git a/drivers/video/via/accel.c b/drivers/video/via/accel.c index 632523ff1fb7..45c54bfe99bb 100644 --- a/drivers/video/via/accel.c +++ b/drivers/video/via/accel.c @@ -267,13 +267,17 @@ int viafb_wait_engine_idle(void) int loop = 0; while (!(readl(viaparinfo->io_virt + VIA_REG_STATUS) & - VIA_VR_QUEUE_BUSY) && (loop++ < MAXLOOP)) + VIA_VR_QUEUE_BUSY) && (loop < MAXLOOP)) { + loop++; cpu_relax(); + } while ((readl(viaparinfo->io_virt + VIA_REG_STATUS) & (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY | VIA_3D_ENG_BUSY)) && - (loop++ < MAXLOOP)) + (loop < MAXLOOP)) { + loop++; cpu_relax(); + } return loop >= MAXLOOP; } diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index a76803108d06..b7ff33c63101 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -186,6 +186,8 @@ int autofs4_expire_wait(struct dentry *dentry); int autofs4_expire_run(struct super_block *, struct vfsmount *, struct autofs_sb_info *, struct autofs_packet_expire __user *); +int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, + struct autofs_sb_info *sbi, int when); int autofs4_expire_multi(struct super_block *, struct vfsmount *, struct autofs_sb_info *, int __user *); struct dentry *autofs4_expire_direct(struct super_block *sb, diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 025e105bffea..9e5ae8a4f5c8 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -525,40 +525,13 @@ static int autofs_dev_ioctl_expire(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - struct dentry *dentry; struct vfsmount *mnt; - int err = -EAGAIN; int how; how = param->expire.how; mnt = fp->f_path.mnt; - if (autofs_type_trigger(sbi->type)) - dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how); - else - dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how); - - if (dentry) { - struct autofs_info *ino = autofs4_dentry_ino(dentry); - - /* - * This is synchronous because it makes the daemon a - * little easier - */ - err = autofs4_wait(sbi, dentry, NFY_EXPIRE); - - spin_lock(&sbi->fs_lock); - if (ino->flags & AUTOFS_INF_MOUNTPOINT) { - ino->flags &= ~AUTOFS_INF_MOUNTPOINT; - sbi->sb->s_root->d_mounted++; - } - ino->flags &= ~AUTOFS_INF_EXPIRING; - complete_all(&ino->expire_complete); - spin_unlock(&sbi->fs_lock); - dput(dentry); - } - - return err; + return autofs4_do_expire_multi(sbi->sb, mnt, sbi, how); } /* Check if autofs mount point is in use */ diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index e3bd50776f9e..75f7ddacf7d6 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -478,22 +478,16 @@ int autofs4_expire_run(struct super_block *sb, return ret; } -/* Call repeatedly until it returns -EAGAIN, meaning there's nothing - more to be done */ -int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, - struct autofs_sb_info *sbi, int __user *arg) +int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, + struct autofs_sb_info *sbi, int when) { struct dentry *dentry; int ret = -EAGAIN; - int do_now = 0; - - if (arg && get_user(do_now, arg)) - return -EFAULT; if (autofs_type_trigger(sbi->type)) - dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); + dentry = autofs4_expire_direct(sb, mnt, sbi, when); else - dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); + dentry = autofs4_expire_indirect(sb, mnt, sbi, when); if (dentry) { struct autofs_info *ino = autofs4_dentry_ino(dentry); @@ -516,3 +510,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, return ret; } +/* Call repeatedly until it returns -EAGAIN, meaning there's nothing + more to be done */ +int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, + struct autofs_sb_info *sbi, int __user *arg) +{ + int do_now = 0; + + if (arg && get_user(do_now, arg)) + return -EFAULT; + + return autofs4_do_expire_multi(sb, mnt, sbi, do_now); +} + diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 74b1469a9504..e383bf0334f1 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -485,22 +485,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); - expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name); - if (expiring) { - /* - * If we are racing with expire the request might not - * be quite complete but the directory has been removed - * so it must have been successful, so just wait for it. - */ - ino = autofs4_dentry_ino(expiring); - autofs4_expire_wait(expiring); - spin_lock(&sbi->lookup_lock); - if (!list_empty(&ino->expiring)) - list_del_init(&ino->expiring); - spin_unlock(&sbi->lookup_lock); - dput(expiring); - } - unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); if (unhashed) dentry = unhashed; @@ -538,14 +522,31 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s } if (!oz_mode) { + mutex_unlock(&dir->i_mutex); + expiring = autofs4_lookup_expiring(sbi, + dentry->d_parent, + &dentry->d_name); + if (expiring) { + /* + * If we are racing with expire the request might not + * be quite complete but the directory has been removed + * so it must have been successful, so just wait for it. + */ + ino = autofs4_dentry_ino(expiring); + autofs4_expire_wait(expiring); + spin_lock(&sbi->lookup_lock); + if (!list_empty(&ino->expiring)) + list_del_init(&ino->expiring); + spin_unlock(&sbi->lookup_lock); + dput(expiring); + } + spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_AUTOFS_PENDING; spin_unlock(&dentry->d_lock); - if (dentry->d_op && dentry->d_op->d_revalidate) { - mutex_unlock(&dir->i_mutex); + if (dentry->d_op && dentry->d_op->d_revalidate) (dentry->d_op->d_revalidate)(dentry, nd); - mutex_lock(&dir->i_mutex); - } + mutex_lock(&dir->i_mutex); } /* diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d2cf5a54a4b8..9adf5e4f7e96 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ - compression.o + compression.o delayed-ref.o else # Normal Makefile diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 72677ce2b74f..b30986f00b9d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -66,6 +66,12 @@ struct btrfs_inode { */ struct list_head delalloc_inodes; + /* + * list for tracking inodes that must be sent to disk before a + * rename or truncate commit + */ + struct list_head ordered_operations; + /* the space_info for where this inode's data allocations are done */ struct btrfs_space_info *space_info; @@ -86,12 +92,6 @@ struct btrfs_inode { */ u64 logged_trans; - /* - * trans that last made a change that should be fully fsync'd. This - * gets reset to zero each time the inode is logged - */ - u64 log_dirty_trans; - /* total number of bytes pending delalloc, used by stat to calc the * real block usage of the file */ @@ -121,6 +121,25 @@ struct btrfs_inode { /* the start of block group preferred for allocations. */ u64 block_group; + /* the fsync log has some corner cases that mean we have to check + * directories to see if any unlinks have been done before + * the directory was logged. See tree-log.c for all the + * details + */ + u64 last_unlink_trans; + + /* + * ordered_data_close is set by truncate when a file that used + * to have good data has been truncated to zero. When it is set + * the btrfs file release call will add this inode to the + * ordered operations list so that we make sure to flush out any + * new data the application may have written before commit. + * + * yes, its silly to have a single bitflag, but we might grow more + * of these. + */ + unsigned ordered_data_close:1; + struct inode vfs_inode; }; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 37f31b5529aa..dbb724124633 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -254,18 +254,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, * empty_size -- a hint that you plan on doing more cow. This is the size in * bytes the allocator should try to find free next to the block it returns. * This is just a hint and may be ignored by the allocator. - * - * prealloc_dest -- if you have already reserved a destination for the cow, - * this uses that block instead of allocating a new one. - * btrfs_alloc_reserved_extent is used to finish the allocation. */ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, - u64 search_start, u64 empty_size, - u64 prealloc_dest) + u64 search_start, u64 empty_size) { u64 parent_start; struct extent_buffer *cow; @@ -291,26 +286,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); - if (prealloc_dest) { - struct btrfs_key ins; - - ins.objectid = prealloc_dest; - ins.offset = buf->len; - ins.type = BTRFS_EXTENT_ITEM_KEY; - - ret = btrfs_alloc_reserved_extent(trans, root, parent_start, - root->root_key.objectid, - trans->transid, level, &ins); - BUG_ON(ret); - cow = btrfs_init_new_buffer(trans, root, prealloc_dest, - buf->len, level); - } else { - cow = btrfs_alloc_free_block(trans, root, buf->len, - parent_start, - root->root_key.objectid, - trans->transid, level, - search_start, empty_size); - } + cow = btrfs_alloc_free_block(trans, root, buf->len, + parent_start, root->root_key.objectid, + trans->transid, level, + search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -413,7 +392,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, - struct extent_buffer **cow_ret, u64 prealloc_dest) + struct extent_buffer **cow_ret) { u64 search_start; int ret; @@ -436,7 +415,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_header_owner(buf) == root->root_key.objectid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { *cow_ret = buf; - WARN_ON(prealloc_dest); return 0; } @@ -447,8 +425,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_set_lock_blocking(buf); ret = __btrfs_cow_block(trans, root, buf, parent, - parent_slot, cow_ret, search_start, 0, - prealloc_dest); + parent_slot, cow_ret, search_start, 0); return ret; } @@ -617,7 +594,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, err = __btrfs_cow_block(trans, root, cur, parent, i, &cur, search_start, min(16 * blocksize, - (end_slot - i) * blocksize), 0); + (end_slot - i) * blocksize)); if (err) { btrfs_tree_unlock(cur); free_extent_buffer(cur); @@ -937,7 +914,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BUG_ON(!child); btrfs_tree_lock(child); btrfs_set_lock_blocking(child); - ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); + ret = btrfs_cow_block(trans, root, child, mid, 0, &child); BUG_ON(ret); spin_lock(&root->node_lock); @@ -945,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, spin_unlock(&root->node_lock); ret = btrfs_update_extent_ref(trans, root, child->start, + child->len, mid->start, child->start, root->root_key.objectid, trans->transid, level - 1); @@ -971,6 +949,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; + if (trans->transaction->delayed_refs.flushing && + btrfs_header_nritems(mid) > 2) + return 0; + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; @@ -979,7 +961,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_lock(left); btrfs_set_lock_blocking(left); wret = btrfs_cow_block(trans, root, left, - parent, pslot - 1, &left, 0); + parent, pslot - 1, &left); if (wret) { ret = wret; goto enospc; @@ -990,7 +972,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_lock(right); btrfs_set_lock_blocking(right); wret = btrfs_cow_block(trans, root, right, - parent, pslot + 1, &right, 0); + parent, pslot + 1, &right); if (wret) { ret = wret; goto enospc; @@ -1171,7 +1153,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, wret = 1; } else { ret = btrfs_cow_block(trans, root, left, parent, - pslot - 1, &left, 0); + pslot - 1, &left); if (ret) wret = 1; else { @@ -1222,7 +1204,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, } else { ret = btrfs_cow_block(trans, root, right, parent, pslot + 1, - &right, 0); + &right); if (ret) wret = 1; else { @@ -1492,7 +1474,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root u8 lowest_level = 0; u64 blocknr; u64 gen; - struct btrfs_key prealloc_block; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); @@ -1501,8 +1482,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (ins_len < 0) lowest_unlock = 2; - prealloc_block.objectid = 0; - again: if (p->skip_locking) b = btrfs_root_node(root); @@ -1529,44 +1508,11 @@ again: !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { goto cow_done; } - - /* ok, we have to cow, is our old prealloc the right - * size? - */ - if (prealloc_block.objectid && - prealloc_block.offset != b->len) { - btrfs_release_path(root, p); - btrfs_free_reserved_extent(root, - prealloc_block.objectid, - prealloc_block.offset); - prealloc_block.objectid = 0; - goto again; - } - - /* - * for higher level blocks, try not to allocate blocks - * with the block and the parent locks held. - */ - if (level > 0 && !prealloc_block.objectid) { - u32 size = b->len; - u64 hint = b->start; - - btrfs_release_path(root, p); - ret = btrfs_reserve_extent(trans, root, - size, size, 0, - hint, (u64)-1, - &prealloc_block, 0); - BUG_ON(ret); - goto again; - } - btrfs_set_path_blocking(p); wret = btrfs_cow_block(trans, root, b, p->nodes[level + 1], - p->slots[level + 1], - &b, prealloc_block.objectid); - prealloc_block.objectid = 0; + p->slots[level + 1], &b); if (wret) { free_extent_buffer(b); ret = wret; @@ -1742,12 +1688,8 @@ done: * we don't really know what they plan on doing with the path * from here on, so for now just mark it as blocking */ - btrfs_set_path_blocking(p); - if (prealloc_block.objectid) { - btrfs_free_reserved_extent(root, - prealloc_block.objectid, - prealloc_block.offset); - } + if (!p->leave_spinning) + btrfs_set_path_blocking(p); return ret; } @@ -1768,7 +1710,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, int ret; eb = btrfs_lock_root_node(root); - ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); + ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb); BUG_ON(ret); btrfs_set_lock_blocking(eb); @@ -1826,7 +1768,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, } ret = btrfs_cow_block(trans, root, eb, parent, slot, - &eb, 0); + &eb); BUG_ON(ret); if (root->root_key.objectid == @@ -2139,7 +2081,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, spin_unlock(&root->node_lock); ret = btrfs_update_extent_ref(trans, root, lower->start, - lower->start, c->start, + lower->len, lower->start, c->start, root->root_key.objectid, trans->transid, level - 1); BUG_ON(ret); @@ -2221,7 +2163,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; - } else { + } else if (!trans->transaction->delayed_refs.flushing) { ret = push_nodes_for_insert(trans, root, path, level); c = path->nodes[level]; if (!ret && btrfs_header_nritems(c) < @@ -2329,66 +2271,27 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, return ret; } -/* - * push some data in the path leaf to the right, trying to free up at - * least data_size bytes. returns zero if the push worked, nonzero otherwise - * - * returns 1 if the push failed because the other node didn't have enough - * room, 0 if everything worked out and < 0 if there were major errors. - */ -static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size, - int empty) +static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + int data_size, int empty, + struct extent_buffer *right, + int free_space, u32 left_nritems) { struct extent_buffer *left = path->nodes[0]; - struct extent_buffer *right; - struct extent_buffer *upper; + struct extent_buffer *upper = path->nodes[1]; struct btrfs_disk_key disk_key; int slot; u32 i; - int free_space; int push_space = 0; int push_items = 0; struct btrfs_item *item; - u32 left_nritems; u32 nr; u32 right_nritems; u32 data_end; u32 this_item_size; int ret; - slot = path->slots[1]; - if (!path->nodes[1]) - return 1; - - upper = path->nodes[1]; - if (slot >= btrfs_header_nritems(upper) - 1) - return 1; - - btrfs_assert_tree_locked(path->nodes[1]); - - right = read_node_slot(root, upper, slot + 1); - btrfs_tree_lock(right); - btrfs_set_lock_blocking(right); - - free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size) - goto out_unlock; - - /* cow and double check */ - ret = btrfs_cow_block(trans, root, right, upper, - slot + 1, &right, 0); - if (ret) - goto out_unlock; - - free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size) - goto out_unlock; - - left_nritems = btrfs_header_nritems(left); - if (left_nritems == 0) - goto out_unlock; - if (empty) nr = 0; else @@ -2397,6 +2300,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (path->slots[0] >= left_nritems) push_space += data_size; + slot = path->slots[1]; i = left_nritems - 1; while (i >= nr) { item = btrfs_item_nr(left, i); @@ -2528,24 +2432,82 @@ out_unlock: } /* + * push some data in the path leaf to the right, trying to free up at + * least data_size bytes. returns zero if the push worked, nonzero otherwise + * + * returns 1 if the push failed because the other node didn't have enough + * room, 0 if everything worked out and < 0 if there were major errors. + */ +static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size, + int empty) +{ + struct extent_buffer *left = path->nodes[0]; + struct extent_buffer *right; + struct extent_buffer *upper; + int slot; + int free_space; + u32 left_nritems; + int ret; + + if (!path->nodes[1]) + return 1; + + slot = path->slots[1]; + upper = path->nodes[1]; + if (slot >= btrfs_header_nritems(upper) - 1) + return 1; + + btrfs_assert_tree_locked(path->nodes[1]); + + right = read_node_slot(root, upper, slot + 1); + btrfs_tree_lock(right); + btrfs_set_lock_blocking(right); + + free_space = btrfs_leaf_free_space(root, right); + if (free_space < data_size) + goto out_unlock; + + /* cow and double check */ + ret = btrfs_cow_block(trans, root, right, upper, + slot + 1, &right); + if (ret) + goto out_unlock; + + free_space = btrfs_leaf_free_space(root, right); + if (free_space < data_size) + goto out_unlock; + + left_nritems = btrfs_header_nritems(left); + if (left_nritems == 0) + goto out_unlock; + + return __push_leaf_right(trans, root, path, data_size, empty, + right, free_space, left_nritems); +out_unlock: + btrfs_tree_unlock(right); + free_extent_buffer(right); + return 1; +} + +/* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise */ -static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size, - int empty) +static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int data_size, + int empty, struct extent_buffer *left, + int free_space, int right_nritems) { struct btrfs_disk_key disk_key; struct extent_buffer *right = path->nodes[0]; - struct extent_buffer *left; int slot; int i; - int free_space; int push_space = 0; int push_items = 0; struct btrfs_item *item; u32 old_left_nritems; - u32 right_nritems; u32 nr; int ret = 0; int wret; @@ -2553,41 +2515,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root u32 old_left_item_size; slot = path->slots[1]; - if (slot == 0) - return 1; - if (!path->nodes[1]) - return 1; - - right_nritems = btrfs_header_nritems(right); - if (right_nritems == 0) - return 1; - - btrfs_assert_tree_locked(path->nodes[1]); - - left = read_node_slot(root, path->nodes[1], slot - 1); - btrfs_tree_lock(left); - btrfs_set_lock_blocking(left); - - free_space = btrfs_leaf_free_space(root, left); - if (free_space < data_size) { - ret = 1; - goto out; - } - - /* cow and double check */ - ret = btrfs_cow_block(trans, root, left, - path->nodes[1], slot - 1, &left, 0); - if (ret) { - /* we hit -ENOSPC, but it isn't fatal here */ - ret = 1; - goto out; - } - - free_space = btrfs_leaf_free_space(root, left); - if (free_space < data_size) { - ret = 1; - goto out; - } if (empty) nr = right_nritems; @@ -2755,6 +2682,154 @@ out: } /* + * push some data in the path leaf to the left, trying to free up at + * least data_size bytes. returns zero if the push worked, nonzero otherwise + */ +static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size, + int empty) +{ + struct extent_buffer *right = path->nodes[0]; + struct extent_buffer *left; + int slot; + int free_space; + u32 right_nritems; + int ret = 0; + + slot = path->slots[1]; + if (slot == 0) + return 1; + if (!path->nodes[1]) + return 1; + + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) + return 1; + + btrfs_assert_tree_locked(path->nodes[1]); + + left = read_node_slot(root, path->nodes[1], slot - 1); + btrfs_tree_lock(left); + btrfs_set_lock_blocking(left); + + free_space = btrfs_leaf_free_space(root, left); + if (free_space < data_size) { + ret = 1; + goto out; + } + + /* cow and double check */ + ret = btrfs_cow_block(trans, root, left, + path->nodes[1], slot - 1, &left); + if (ret) { + /* we hit -ENOSPC, but it isn't fatal here */ + ret = 1; + goto out; + } + + free_space = btrfs_leaf_free_space(root, left); + if (free_space < data_size) { + ret = 1; + goto out; + } + + return __push_leaf_left(trans, root, path, data_size, + empty, left, free_space, right_nritems); +out: + btrfs_tree_unlock(left); + free_extent_buffer(left); + return ret; +} + +/* + * split the path's leaf in two, making sure there is at least data_size + * available for the resulting leaf level of the path. + * + * returns 0 if all went well and < 0 on failure. + */ +static noinline int copy_for_split(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *l, + struct extent_buffer *right, + int slot, int mid, int nritems) +{ + int data_copy_size; + int rt_data_off; + int i; + int ret = 0; + int wret; + struct btrfs_disk_key disk_key; + + nritems = nritems - mid; + btrfs_set_header_nritems(right, nritems); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + copy_extent_buffer(right, l, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(mid), + nritems * sizeof(struct btrfs_item)); + + copy_extent_buffer(right, l, + btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + data_copy_size, btrfs_leaf_data(l) + + leaf_data_end(root, l), data_copy_size); + + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_end_nr(l, mid); + + for (i = 0; i < nritems; i++) { + struct btrfs_item *item = btrfs_item_nr(right, i); + u32 ioff; + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); + } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; + } + + btrfs_set_header_nritems(l, mid); + ret = 0; + btrfs_item_key(right, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, right->start, + path->slots[1] + 1, 1); + if (wret) + ret = wret; + + btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(l); + BUG_ON(path->slots[0] != slot); + + ret = btrfs_update_ref(trans, root, l, right, 0, nritems); + BUG_ON(ret); + + if (mid <= slot) { + btrfs_tree_unlock(path->nodes[0]); + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; + path->slots[0] -= mid; + path->slots[1] += 1; + } else { + btrfs_tree_unlock(right); + free_extent_buffer(right); + } + + BUG_ON(path->slots[0] < 0); + + return ret; +} + +/* * split the path's leaf in two, making sure there is at least data_size * available for the resulting leaf level of the path. * @@ -2771,17 +2846,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int mid; int slot; struct extent_buffer *right; - int data_copy_size; - int rt_data_off; - int i; int ret = 0; int wret; int double_split; int num_doubles = 0; - struct btrfs_disk_key disk_key; /* first try to make some room by pushing left and right */ - if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { + if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY && + !trans->transaction->delayed_refs.flushing) { wret = push_leaf_right(trans, root, path, data_size, 0); if (wret < 0) return wret; @@ -2830,11 +2902,14 @@ again: write_extent_buffer(right, root->fs_info->chunk_tree_uuid, (unsigned long)btrfs_header_chunk_tree_uuid(right), BTRFS_UUID_SIZE); + if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { if (slot >= nritems) { + struct btrfs_disk_key disk_key; + btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, @@ -2862,6 +2937,8 @@ again: if (leaf_space_used(l, 0, mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { if (!extend && data_size && slot == 0) { + struct btrfs_disk_key disk_key; + btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, @@ -2894,76 +2971,16 @@ again: } } } - nritems = nritems - mid; - btrfs_set_header_nritems(right, nritems); - data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); - - copy_extent_buffer(right, l, btrfs_item_nr_offset(0), - btrfs_item_nr_offset(mid), - nritems * sizeof(struct btrfs_item)); - - copy_extent_buffer(right, l, - btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - - data_copy_size, btrfs_leaf_data(l) + - leaf_data_end(root, l), data_copy_size); - - rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_end_nr(l, mid); - - for (i = 0; i < nritems; i++) { - struct btrfs_item *item = btrfs_item_nr(right, i); - u32 ioff; - - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } - - ioff = btrfs_item_offset(right, item); - btrfs_set_item_offset(right, item, ioff + rt_data_off); - } - - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } - - btrfs_set_header_nritems(l, mid); - ret = 0; - btrfs_item_key(right, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1); - if (wret) - ret = wret; - - btrfs_mark_buffer_dirty(right); - btrfs_mark_buffer_dirty(l); - BUG_ON(path->slots[0] != slot); - ret = btrfs_update_ref(trans, root, l, right, 0, nritems); + ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); BUG_ON(ret); - if (mid <= slot) { - btrfs_tree_unlock(path->nodes[0]); - free_extent_buffer(path->nodes[0]); - path->nodes[0] = right; - path->slots[0] -= mid; - path->slots[1] += 1; - } else { - btrfs_tree_unlock(right); - free_extent_buffer(right); - } - - BUG_ON(path->slots[0] < 0); - if (double_split) { BUG_ON(num_doubles != 0); num_doubles++; goto again; } + return ret; } @@ -3021,26 +3038,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, return -EAGAIN; } + btrfs_set_path_blocking(path); ret = split_leaf(trans, root, &orig_key, path, sizeof(struct btrfs_item), 1); path->keep_locks = 0; BUG_ON(ret); + btrfs_unlock_up_safe(path, 1); + leaf = path->nodes[0]; + BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); + +split: /* * make sure any changes to the path from split_leaf leave it * in a blocking state */ btrfs_set_path_blocking(path); - leaf = path->nodes[0]; - BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); - -split: item = btrfs_item_nr(leaf, path->slots[0]); orig_offset = btrfs_item_offset(leaf, item); item_size = btrfs_item_size(leaf, item); - buf = kmalloc(item_size, GFP_NOFS); read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, path->slots[0]), item_size); @@ -3445,39 +3463,27 @@ out: } /* - * Given a key and some data, insert items into the tree. - * This does all the path init required, making room in the tree if needed. + * this is a helper for btrfs_insert_empty_items, the main goal here is + * to save stack depth by doing the bulk of the work in a function + * that doesn't call btrfs_search_slot */ -int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 *data_size, - int nr) +static noinline_for_stack int +setup_items_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + u32 total_data, u32 total_size, int nr) { - struct extent_buffer *leaf; struct btrfs_item *item; - int ret = 0; - int slot; - int slot_orig; int i; u32 nritems; - u32 total_size = 0; - u32 total_data = 0; unsigned int data_end; struct btrfs_disk_key disk_key; + int ret; + struct extent_buffer *leaf; + int slot; - for (i = 0; i < nr; i++) - total_data += data_size[i]; - - total_size = total_data + (nr * sizeof(struct btrfs_item)); - ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); - if (ret == 0) - return -EEXIST; - if (ret < 0) - goto out; - - slot_orig = path->slots[0]; leaf = path->nodes[0]; + slot = path->slots[0]; nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); @@ -3489,9 +3495,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, BUG(); } - slot = path->slots[0]; - BUG_ON(slot < 0); - if (slot != nritems) { unsigned int old_data = btrfs_item_end_nr(leaf, slot); @@ -3547,21 +3550,60 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, data_end -= data_size[i]; btrfs_set_item_size(leaf, item, data_size[i]); } + btrfs_set_header_nritems(leaf, nritems + nr); - btrfs_mark_buffer_dirty(leaf); ret = 0; if (slot == 0) { + struct btrfs_disk_key disk_key; btrfs_cpu_key_to_disk(&disk_key, cpu_key); ret = fixup_low_keys(trans, root, path, &disk_key, 1); } + btrfs_unlock_up_safe(path, 1); + btrfs_mark_buffer_dirty(leaf); if (btrfs_leaf_free_space(root, leaf) < 0) { btrfs_print_leaf(root, leaf); BUG(); } + return ret; +} + +/* + * Given a key and some data, insert items into the tree. + * This does all the path init required, making room in the tree if needed. + */ +int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + int nr) +{ + struct extent_buffer *leaf; + int ret = 0; + int slot; + int i; + u32 total_size = 0; + u32 total_data = 0; + + for (i = 0; i < nr; i++) + total_data += data_size[i]; + + total_size = total_data + (nr * sizeof(struct btrfs_item)); + ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); + if (ret == 0) + return -EEXIST; + if (ret < 0) + goto out; + + leaf = path->nodes[0]; + slot = path->slots[0]; + BUG_ON(slot < 0); + + ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, + total_data, total_size, nr); + out: - btrfs_unlock_up_safe(path, 1); return ret; } @@ -3749,7 +3791,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } /* delete the leaf if it is mostly empty */ - if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { + if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 && + !trans->transaction->delayed_refs.flushing) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below @@ -3757,6 +3800,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, slot = path->slots[1]; extent_buffer_get(leaf); + btrfs_set_path_blocking(path); wret = push_leaf_left(trans, root, path, 1, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e1d4e30e9d8..9417713542a2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -45,6 +45,13 @@ struct btrfs_ordered_sum; #define BTRFS_MAX_LEVEL 8 +/* + * files bigger than this get some pre-flushing when they are added + * to the ordered operations list. That way we limit the total + * work done by the commit + */ +#define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024) + /* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL @@ -401,15 +408,16 @@ struct btrfs_path { int locks[BTRFS_MAX_LEVEL]; int reada; /* keep some upper locks as we walk down */ - int keep_locks; - int skip_locking; int lowest_level; /* * set by btrfs_split_item, tells search_slot to keep all locks * and to force calls to keep space in the nodes */ - int search_for_split; + unsigned int search_for_split:1; + unsigned int keep_locks:1; + unsigned int skip_locking:1; + unsigned int leave_spinning:1; }; /* @@ -688,15 +696,18 @@ struct btrfs_fs_info { struct rb_root block_group_cache_tree; struct extent_io_tree pinned_extents; - struct extent_io_tree pending_del; - struct extent_io_tree extent_ins; /* logical->physical extent mapping */ struct btrfs_mapping_tree mapping_tree; u64 generation; u64 last_trans_committed; - u64 last_trans_new_blockgroup; + + /* + * this is updated to the current trans every time a full commit + * is required instead of the faster short fsync log commits + */ + u64 last_trans_log_full_commit; u64 open_ioctl_trans; unsigned long mount_opt; u64 max_extent; @@ -717,12 +728,21 @@ struct btrfs_fs_info { struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; - struct mutex extent_ins_mutex; struct mutex pinned_mutex; struct mutex chunk_mutex; struct mutex drop_mutex; struct mutex volume_mutex; struct mutex tree_reloc_mutex; + + /* + * this protects the ordered operations list only while we are + * processing all of the entries on it. This way we make + * sure the commit code doesn't find the list temporarily empty + * because another function happens to be doing non-waiting preflush + * before jumping into the main commit. + */ + struct mutex ordered_operations_mutex; + struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -737,10 +757,29 @@ struct btrfs_fs_info { * ordered extents */ spinlock_t ordered_extent_lock; + + /* + * all of the data=ordered extents pending writeback + * these can span multiple transactions and basically include + * every dirty data page that isn't from nodatacow + */ struct list_head ordered_extents; + + /* + * all of the inodes that have delalloc bytes. It is possible for + * this list to be empty even when there is still dirty data=ordered + * extents waiting to finish IO. + */ struct list_head delalloc_inodes; /* + * special rename and truncate targets that must be on disk before + * we're allowed to commit. This is basically the ext3 style + * data=ordered list. + */ + struct list_head ordered_operations; + + /* * there is a pool of worker threads for checksumming during writes * and a pool for checksumming after reads. This is because readers * can run with FS locks held, and the writers may be waiting for @@ -781,6 +820,11 @@ struct btrfs_fs_info { atomic_t throttle_gen; u64 total_pinned; + + /* protected by the delalloc lock, used to keep from writing + * metadata until there is a nice batch + */ + u64 dirty_metadata_bytes; struct list_head dirty_cowonly_roots; struct btrfs_fs_devices *fs_devices; @@ -1704,18 +1748,15 @@ static inline struct dentry *fdentry(struct file *file) } /* extent-tree.c */ +int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, unsigned long count); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); -int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u32 *refs); int btrfs_update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 bytenr); -int btrfs_extent_post_op(struct btrfs_trans_handle *trans, - struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, @@ -1777,7 +1818,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 root_objectid, u64 ref_generation, u64 owner_objectid); int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, + struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 orig_parent, u64 parent, u64 root_objectid, u64 ref_generation, u64 owner_objectid); @@ -1838,7 +1879,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, - struct extent_buffer **cow_ret, u64 prealloc_dest); + struct extent_buffer **cow_ret); int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -2060,7 +2101,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); void btrfs_put_inode(struct inode *inode); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c new file mode 100644 index 000000000000..cbf7dc8ae3ec --- /dev/null +++ b/fs/btrfs/delayed-ref.c @@ -0,0 +1,669 @@ +/* + * Copyright (C) 2009 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/sched.h> +#include <linux/sort.h> +#include <linux/ftrace.h> +#include "ctree.h" +#include "delayed-ref.h" +#include "transaction.h" + +/* + * delayed back reference update tracking. For subvolume trees + * we queue up extent allocations and backref maintenance for + * delayed processing. This avoids deep call chains where we + * add extents in the middle of btrfs_search_slot, and it allows + * us to buffer up frequently modified backrefs in an rb tree instead + * of hammering updates on the extent allocation tree. + * + * Right now this code is only used for reference counted trees, but + * the long term goal is to get rid of the similar code for delayed + * extent tree modifications. + */ + +/* + * entries in the rb tree are ordered by the byte number of the extent + * and by the byte number of the parent block. + */ +static int comp_entry(struct btrfs_delayed_ref_node *ref, + u64 bytenr, u64 parent) +{ + if (bytenr < ref->bytenr) + return -1; + if (bytenr > ref->bytenr) + return 1; + if (parent < ref->parent) + return -1; + if (parent > ref->parent) + return 1; + return 0; +} + +/* + * insert a new ref into the rbtree. This returns any existing refs + * for the same (bytenr,parent) tuple, or NULL if the new node was properly + * inserted. + */ +static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, + u64 bytenr, u64 parent, + struct rb_node *node) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent_node = NULL; + struct btrfs_delayed_ref_node *entry; + int cmp; + + while (*p) { + parent_node = *p; + entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, + rb_node); + + cmp = comp_entry(entry, bytenr, parent); + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else + return entry; + } + + entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + rb_link_node(node, parent_node, p); + rb_insert_color(node, root); + return NULL; +} + +/* + * find an entry based on (bytenr,parent). This returns the delayed + * ref if it was able to find one, or NULL if nothing was in that spot + */ +static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, + u64 bytenr, u64 parent, + struct btrfs_delayed_ref_node **last) +{ + struct rb_node *n = root->rb_node; + struct btrfs_delayed_ref_node *entry; + int cmp; + + while (n) { + entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); + WARN_ON(!entry->in_tree); + if (last) + *last = entry; + + cmp = comp_entry(entry, bytenr, parent); + if (cmp < 0) + n = n->rb_left; + else if (cmp > 0) + n = n->rb_right; + else + return entry; + } + return NULL; +} + +int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_head *head) +{ + struct btrfs_delayed_ref_root *delayed_refs; + + delayed_refs = &trans->transaction->delayed_refs; + assert_spin_locked(&delayed_refs->lock); + if (mutex_trylock(&head->mutex)) + return 0; + + atomic_inc(&head->node.refs); + spin_unlock(&delayed_refs->lock); + + mutex_lock(&head->mutex); + spin_lock(&delayed_refs->lock); + if (!head->node.in_tree) { + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(&head->node); + return -EAGAIN; + } + btrfs_put_delayed_ref(&head->node); + return 0; +} + +int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, + struct list_head *cluster, u64 start) +{ + int count = 0; + struct btrfs_delayed_ref_root *delayed_refs; + struct rb_node *node; + struct btrfs_delayed_ref_node *ref; + struct btrfs_delayed_ref_head *head; + + delayed_refs = &trans->transaction->delayed_refs; + if (start == 0) { + node = rb_first(&delayed_refs->root); + } else { + ref = NULL; + tree_search(&delayed_refs->root, start, (u64)-1, &ref); + if (ref) { + struct btrfs_delayed_ref_node *tmp; + + node = rb_prev(&ref->rb_node); + while (node) { + tmp = rb_entry(node, + struct btrfs_delayed_ref_node, + rb_node); + if (tmp->bytenr < start) + break; + ref = tmp; + node = rb_prev(&ref->rb_node); + } + node = &ref->rb_node; + } else + node = rb_first(&delayed_refs->root); + } +again: + while (node && count < 32) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + if (btrfs_delayed_ref_is_head(ref)) { + head = btrfs_delayed_node_to_head(ref); + if (list_empty(&head->cluster)) { + list_add_tail(&head->cluster, cluster); + delayed_refs->run_delayed_start = + head->node.bytenr; + count++; + + WARN_ON(delayed_refs->num_heads_ready == 0); + delayed_refs->num_heads_ready--; + } else if (count) { + /* the goal of the clustering is to find extents + * that are likely to end up in the same extent + * leaf on disk. So, we don't want them spread + * all over the tree. Stop now if we've hit + * a head that was already in use + */ + break; + } + } + node = rb_next(node); + } + if (count) { + return 0; + } else if (start) { + /* + * we've gone to the end of the rbtree without finding any + * clusters. start from the beginning and try again + */ + start = 0; + node = rb_first(&delayed_refs->root); + goto again; + } + return 1; +} + +/* + * This checks to see if there are any delayed refs in the + * btree for a given bytenr. It returns one if it finds any + * and zero otherwise. + * + * If it only finds a head node, it returns 0. + * + * The idea is to use this when deciding if you can safely delete an + * extent from the extent allocation tree. There may be a pending + * ref in the rbtree that adds or removes references, so as long as this + * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent + * allocation tree. + */ +int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr) +{ + struct btrfs_delayed_ref_node *ref; + struct btrfs_delayed_ref_root *delayed_refs; + struct rb_node *prev_node; + int ret = 0; + + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + + ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); + if (ref) { + prev_node = rb_prev(&ref->rb_node); + if (!prev_node) + goto out; + ref = rb_entry(prev_node, struct btrfs_delayed_ref_node, + rb_node); + if (ref->bytenr == bytenr) + ret = 1; + } +out: + spin_unlock(&delayed_refs->lock); + return ret; +} + +/* + * helper function to lookup reference count + * + * the head node for delayed ref is used to store the sum of all the + * reference count modifications queued up in the rbtree. This way you + * can check to see what the reference count would be if all of the + * delayed refs are processed. + */ +int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs) +{ + struct btrfs_delayed_ref_node *ref; + struct btrfs_delayed_ref_head *head; + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u32 num_refs; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + delayed_refs = &trans->transaction->delayed_refs; +again: + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, path, 0, 0); + if (ret < 0) + goto out; + + if (ret == 0) { + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + num_refs = btrfs_extent_refs(leaf, ei); + } else { + num_refs = 0; + ret = 0; + } + + spin_lock(&delayed_refs->lock); + ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); + if (ref) { + head = btrfs_delayed_node_to_head(ref); + if (mutex_trylock(&head->mutex)) { + num_refs += ref->ref_mod; + mutex_unlock(&head->mutex); + *refs = num_refs; + goto out; + } + + atomic_inc(&ref->refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(root->fs_info->extent_root, path); + + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(ref); + goto again; + } else { + *refs = num_refs; + } +out: + spin_unlock(&delayed_refs->lock); + btrfs_free_path(path); + return ret; +} + +/* + * helper function to update an extent delayed ref in the + * rbtree. existing and update must both have the same + * bytenr and parent + * + * This may free existing if the update cancels out whatever + * operation it was doing. + */ +static noinline void +update_existing_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_node *existing, + struct btrfs_delayed_ref_node *update) +{ + struct btrfs_delayed_ref *existing_ref; + struct btrfs_delayed_ref *ref; + + existing_ref = btrfs_delayed_node_to_ref(existing); + ref = btrfs_delayed_node_to_ref(update); + + if (ref->pin) + existing_ref->pin = 1; + + if (ref->action != existing_ref->action) { + /* + * this is effectively undoing either an add or a + * drop. We decrement the ref_mod, and if it goes + * down to zero we just delete the entry without + * every changing the extent allocation tree. + */ + existing->ref_mod--; + if (existing->ref_mod == 0) { + rb_erase(&existing->rb_node, + &delayed_refs->root); + existing->in_tree = 0; + btrfs_put_delayed_ref(existing); + delayed_refs->num_entries--; + if (trans->delayed_ref_updates) + trans->delayed_ref_updates--; + } + } else { + if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { + /* if we're adding refs, make sure all the + * details match up. The extent could + * have been totally freed and reallocated + * by a different owner before the delayed + * ref entries were removed. + */ + existing_ref->owner_objectid = ref->owner_objectid; + existing_ref->generation = ref->generation; + existing_ref->root = ref->root; + existing->num_bytes = update->num_bytes; + } + /* + * the action on the existing ref matches + * the action on the ref we're trying to add. + * Bump the ref_mod by one so the backref that + * is eventually added/removed has the correct + * reference count + */ + existing->ref_mod += update->ref_mod; + } +} + +/* + * helper function to update the accounting in the head ref + * existing and update must have the same bytenr + */ +static noinline void +update_existing_head_ref(struct btrfs_delayed_ref_node *existing, + struct btrfs_delayed_ref_node *update) +{ + struct btrfs_delayed_ref_head *existing_ref; + struct btrfs_delayed_ref_head *ref; + + existing_ref = btrfs_delayed_node_to_head(existing); + ref = btrfs_delayed_node_to_head(update); + + if (ref->must_insert_reserved) { + /* if the extent was freed and then + * reallocated before the delayed ref + * entries were processed, we can end up + * with an existing head ref without + * the must_insert_reserved flag set. + * Set it again here + */ + existing_ref->must_insert_reserved = ref->must_insert_reserved; + + /* + * update the num_bytes so we make sure the accounting + * is done correctly + */ + existing->num_bytes = update->num_bytes; + + } + + /* + * update the reference mod on the head to reflect this new operation + */ + existing->ref_mod += update->ref_mod; +} + +/* + * helper function to actually insert a delayed ref into the rbtree. + * this does all the dirty work in terms of maintaining the correct + * overall modification count in the head node and properly dealing + * with updating existing nodes as new modifications are queued. + */ +static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_node *ref, + u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, + u64 ref_generation, u64 owner_objectid, int action, + int pin) +{ + struct btrfs_delayed_ref_node *existing; + struct btrfs_delayed_ref *full_ref; + struct btrfs_delayed_ref_head *head_ref = NULL; + struct btrfs_delayed_ref_root *delayed_refs; + int count_mod = 1; + int must_insert_reserved = 0; + + /* + * the head node stores the sum of all the mods, so dropping a ref + * should drop the sum in the head node by one. + */ + if (parent == (u64)-1) { + if (action == BTRFS_DROP_DELAYED_REF) + count_mod = -1; + else if (action == BTRFS_UPDATE_DELAYED_HEAD) + count_mod = 0; + } + + /* + * BTRFS_ADD_DELAYED_EXTENT means that we need to update + * the reserved accounting when the extent is finally added, or + * if a later modification deletes the delayed ref without ever + * inserting the extent into the extent allocation tree. + * ref->must_insert_reserved is the flag used to record + * that accounting mods are required. + * + * Once we record must_insert_reserved, switch the action to + * BTRFS_ADD_DELAYED_REF because other special casing is not required. + */ + if (action == BTRFS_ADD_DELAYED_EXTENT) { + must_insert_reserved = 1; + action = BTRFS_ADD_DELAYED_REF; + } else { + must_insert_reserved = 0; + } + + + delayed_refs = &trans->transaction->delayed_refs; + + /* first set the basic ref node struct up */ + atomic_set(&ref->refs, 1); + ref->bytenr = bytenr; + ref->parent = parent; + ref->ref_mod = count_mod; + ref->in_tree = 1; + ref->num_bytes = num_bytes; + + if (btrfs_delayed_ref_is_head(ref)) { + head_ref = btrfs_delayed_node_to_head(ref); + head_ref->must_insert_reserved = must_insert_reserved; + INIT_LIST_HEAD(&head_ref->cluster); + mutex_init(&head_ref->mutex); + } else { + full_ref = btrfs_delayed_node_to_ref(ref); + full_ref->root = ref_root; + full_ref->generation = ref_generation; + full_ref->owner_objectid = owner_objectid; + full_ref->pin = pin; + full_ref->action = action; + } + + existing = tree_insert(&delayed_refs->root, bytenr, + parent, &ref->rb_node); + + if (existing) { + if (btrfs_delayed_ref_is_head(ref)) + update_existing_head_ref(existing, ref); + else + update_existing_ref(trans, delayed_refs, existing, ref); + + /* + * we've updated the existing ref, free the newly + * allocated ref + */ + kfree(ref); + } else { + if (btrfs_delayed_ref_is_head(ref)) { + delayed_refs->num_heads++; + delayed_refs->num_heads_ready++; + } + delayed_refs->num_entries++; + trans->delayed_ref_updates++; + } + return 0; +} + +/* + * add a delayed ref to the tree. This does all of the accounting required + * to make sure the delayed ref is eventually processed before this + * transaction commits. + */ +int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, + u64 ref_generation, u64 owner_objectid, int action, + int pin) +{ + struct btrfs_delayed_ref *ref; + struct btrfs_delayed_ref_head *head_ref; + struct btrfs_delayed_ref_root *delayed_refs; + int ret; + + ref = kmalloc(sizeof(*ref), GFP_NOFS); + if (!ref) + return -ENOMEM; + + /* + * the parent = 0 case comes from cases where we don't actually + * know the parent yet. It will get updated later via a add/drop + * pair. + */ + if (parent == 0) + parent = bytenr; + + head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); + if (!head_ref) { + kfree(ref); + return -ENOMEM; + } + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + + /* + * insert both the head node and the new ref without dropping + * the spin lock + */ + ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, + (u64)-1, 0, 0, 0, action, pin); + BUG_ON(ret); + + ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, + parent, ref_root, ref_generation, + owner_objectid, action, pin); + BUG_ON(ret); + spin_unlock(&delayed_refs->lock); + return 0; +} + +/* + * this does a simple search for the head node for a given extent. + * It must be called with the delayed ref spinlock held, and it returns + * the head node if any where found, or NULL if not. + */ +struct btrfs_delayed_ref_head * +btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) +{ + struct btrfs_delayed_ref_node *ref; + struct btrfs_delayed_ref_root *delayed_refs; + + delayed_refs = &trans->transaction->delayed_refs; + ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); + if (ref) + return btrfs_delayed_node_to_head(ref); + return NULL; +} + +/* + * add a delayed ref to the tree. This does all of the accounting required + * to make sure the delayed ref is eventually processed before this + * transaction commits. + * + * The main point of this call is to add and remove a backreference in a single + * shot, taking the lock only once, and only searching for the head node once. + * + * It is the same as doing a ref add and delete in two separate calls. + */ +int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, u64 orig_parent, + u64 parent, u64 orig_ref_root, u64 ref_root, + u64 orig_ref_generation, u64 ref_generation, + u64 owner_objectid, int pin) +{ + struct btrfs_delayed_ref *ref; + struct btrfs_delayed_ref *old_ref; + struct btrfs_delayed_ref_head *head_ref; + struct btrfs_delayed_ref_root *delayed_refs; + int ret; + + ref = kmalloc(sizeof(*ref), GFP_NOFS); + if (!ref) + return -ENOMEM; + + old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS); + if (!old_ref) { + kfree(ref); + return -ENOMEM; + } + + /* + * the parent = 0 case comes from cases where we don't actually + * know the parent yet. It will get updated later via a add/drop + * pair. + */ + if (parent == 0) + parent = bytenr; + if (orig_parent == 0) + orig_parent = bytenr; + + head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); + if (!head_ref) { + kfree(ref); + kfree(old_ref); + return -ENOMEM; + } + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + + /* + * insert both the head node and the new ref without dropping + * the spin lock + */ + ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, + (u64)-1, 0, 0, 0, + BTRFS_UPDATE_DELAYED_HEAD, 0); + BUG_ON(ret); + + ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, + parent, ref_root, ref_generation, + owner_objectid, BTRFS_ADD_DELAYED_REF, 0); + BUG_ON(ret); + + ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes, + orig_parent, orig_ref_root, + orig_ref_generation, owner_objectid, + BTRFS_DROP_DELAYED_REF, pin); + BUG_ON(ret); + spin_unlock(&delayed_refs->lock); + return 0; +} diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h new file mode 100644 index 000000000000..3bec2ff0b15c --- /dev/null +++ b/fs/btrfs/delayed-ref.h @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#ifndef __DELAYED_REF__ +#define __DELAYED_REF__ + +/* these are the possible values of struct btrfs_delayed_ref->action */ +#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ +#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ +#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ +#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ + +struct btrfs_delayed_ref_node { + struct rb_node rb_node; + + /* the starting bytenr of the extent */ + u64 bytenr; + + /* the parent our backref will point to */ + u64 parent; + + /* the size of the extent */ + u64 num_bytes; + + /* ref count on this data structure */ + atomic_t refs; + + /* + * how many refs is this entry adding or deleting. For + * head refs, this may be a negative number because it is keeping + * track of the total mods done to the reference count. + * For individual refs, this will always be a positive number + * + * It may be more than one, since it is possible for a single + * parent to have more than one ref on an extent + */ + int ref_mod; + + /* is this node still in the rbtree? */ + unsigned int in_tree:1; +}; + +/* + * the head refs are used to hold a lock on a given extent, which allows us + * to make sure that only one process is running the delayed refs + * at a time for a single extent. They also store the sum of all the + * reference count modifications we've queued up. + */ +struct btrfs_delayed_ref_head { + struct btrfs_delayed_ref_node node; + + /* + * the mutex is held while running the refs, and it is also + * held when checking the sum of reference modifications. + */ + struct mutex mutex; + + struct list_head cluster; + + /* + * when a new extent is allocated, it is just reserved in memory + * The actual extent isn't inserted into the extent allocation tree + * until the delayed ref is processed. must_insert_reserved is + * used to flag a delayed ref so the accounting can be updated + * when a full insert is done. + * + * It is possible the extent will be freed before it is ever + * inserted into the extent allocation tree. In this case + * we need to update the in ram accounting to properly reflect + * the free has happened. + */ + unsigned int must_insert_reserved:1; +}; + +struct btrfs_delayed_ref { + struct btrfs_delayed_ref_node node; + + /* the root objectid our ref will point to */ + u64 root; + + /* the generation for the backref */ + u64 generation; + + /* owner_objectid of the backref */ + u64 owner_objectid; + + /* operation done by this entry in the rbtree */ + u8 action; + + /* if pin == 1, when the extent is freed it will be pinned until + * transaction commit + */ + unsigned int pin:1; +}; + +struct btrfs_delayed_ref_root { + struct rb_root root; + + /* this spin lock protects the rbtree and the entries inside */ + spinlock_t lock; + + /* how many delayed ref updates we've queued, used by the + * throttling code + */ + unsigned long num_entries; + + /* total number of head nodes in tree */ + unsigned long num_heads; + + /* total number of head nodes ready for processing */ + unsigned long num_heads_ready; + + /* + * set when the tree is flushing before a transaction commit, + * used by the throttling code to decide if new updates need + * to be run right away + */ + int flushing; + + u64 run_delayed_start; +}; + +static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) +{ + WARN_ON(atomic_read(&ref->refs) == 0); + if (atomic_dec_and_test(&ref->refs)) { + WARN_ON(ref->in_tree); + kfree(ref); + } +} + +int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, + u64 ref_generation, u64 owner_objectid, int action, + int pin); + +struct btrfs_delayed_ref_head * +btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); +int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); +int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs); +int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, u64 orig_parent, + u64 parent, u64 orig_ref_root, u64 ref_root, + u64 orig_ref_generation, u64 ref_generation, + u64 owner_objectid, int pin); +int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_head *head); +int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, + struct list_head *cluster, u64 search_start); +/* + * a node might live in a head or a regular ref, this lets you + * test for the proper type to use. + */ +static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) +{ + return node->parent == (u64)-1; +} + +/* + * helper functions to cast a node into its container + */ +static inline struct btrfs_delayed_ref * +btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) +{ + WARN_ON(btrfs_delayed_ref_is_head(node)); + return container_of(node, struct btrfs_delayed_ref, node); + +} + +static inline struct btrfs_delayed_ref_head * +btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node) +{ + WARN_ON(!btrfs_delayed_ref_is_head(node)); + return container_of(node, struct btrfs_delayed_ref_head, node); + +} +#endif diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 926a0b287a7d..1d70236ba00c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); key.offset = btrfs_name_hash(name, name_len); + path = btrfs_alloc_path(); + path->leave_spinning = 1; + data_size = sizeof(*dir_item) + name_len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, name, name_len); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6ec80c0fc869..92d73929d381 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -668,14 +668,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + struct extent_buffer *eb; + int was_dirty; + tree = &BTRFS_I(page->mapping->host)->io_tree; + if (!(current->flags & PF_MEMALLOC)) { + return extent_write_full_page(tree, page, + btree_get_extent, wbc); + } - if (current->flags & PF_MEMALLOC) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; + redirty_page_for_writepage(wbc, page); + eb = btrfs_find_tree_block(root, page_offset(page), + PAGE_CACHE_SIZE); + WARN_ON(!eb); + + was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); + if (!was_dirty) { + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; + spin_unlock(&root->fs_info->delalloc_lock); } - return extent_write_full_page(tree, page, btree_get_extent, wbc); + free_extent_buffer(eb); + + unlock_page(page); + return 0; } static int btree_writepages(struct address_space *mapping, @@ -684,15 +701,15 @@ static int btree_writepages(struct address_space *mapping, struct extent_io_tree *tree; tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { + struct btrfs_root *root = BTRFS_I(mapping->host)->root; u64 num_dirty; - u64 start = 0; unsigned long thresh = 32 * 1024 * 1024; if (wbc->for_kupdate) return 0; - num_dirty = count_range_bits(tree, &start, (u64)-1, - thresh, EXTENT_DIRTY); + /* this is a bit racy, but that's ok */ + num_dirty = root->fs_info->dirty_metadata_bytes; if (num_dirty < thresh) return 0; } @@ -859,9 +876,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, root->fs_info->running_transaction->transid) { btrfs_assert_tree_locked(buf); - /* ugh, clear_extent_buffer_dirty can be expensive */ - btrfs_set_lock_blocking(buf); + if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { + spin_lock(&root->fs_info->delalloc_lock); + if (root->fs_info->dirty_metadata_bytes >= buf->len) + root->fs_info->dirty_metadata_bytes -= buf->len; + else + WARN_ON(1); + spin_unlock(&root->fs_info->delalloc_lock); + } + /* ugh, clear_extent_buffer_dirty needs to lock the page */ + btrfs_set_lock_blocking(buf); clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -1471,12 +1496,6 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); - if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { - printk(KERN_INFO "btrfs: total reference cache " - "size %llu\n", - root->fs_info->total_ref_cache_size); - } - mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; if (!cur) { @@ -1493,6 +1512,7 @@ static int transaction_kthread(void *arg) mutex_unlock(&root->fs_info->trans_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); + sleep: wake_up_process(root->fs_info->cleaner_kthread); mutex_unlock(&root->fs_info->transaction_kthread_mutex); @@ -1552,6 +1572,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->delalloc_inodes); + INIT_LIST_HEAD(&fs_info->ordered_operations); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->ref_cache_lock); @@ -1611,10 +1632,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&fs_info->pending_del, - fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&fs_info->extent_ins, - fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; INIT_LIST_HEAD(&fs_info->dead_reloc_roots); @@ -1627,9 +1644,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, insert_inode_hash(fs_info->btree_inode); mutex_init(&fs_info->trans_mutex); + mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->drop_mutex); - mutex_init(&fs_info->extent_ins_mutex); mutex_init(&fs_info->pinned_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); @@ -2358,8 +2375,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; - - btrfs_set_lock_blocking(buf); + int was_dirty; btrfs_assert_tree_locked(buf); if (transid != root->fs_info->generation) { @@ -2370,7 +2386,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) (unsigned long long)root->fs_info->generation); WARN_ON(1); } - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); + was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, + buf); + if (!was_dirty) { + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->dirty_metadata_bytes += buf->len; + spin_unlock(&root->fs_info->delalloc_lock); + } } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) @@ -2410,6 +2432,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) int btree_lock_page_hook(struct page *page) { struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_buffer *eb; unsigned long len; @@ -2425,6 +2448,16 @@ int btree_lock_page_hook(struct page *page) btrfs_tree_lock(eb); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + + if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { + spin_lock(&root->fs_info->delalloc_lock); + if (root->fs_info->dirty_metadata_bytes >= eb->len) + root->fs_info->dirty_metadata_bytes -= eb->len; + else + WARN_ON(1); + spin_unlock(&root->fs_info->delalloc_lock); + } + btrfs_tree_unlock(eb); free_extent_buffer(eb); out: diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 95029db227be..c958ecbc1916 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct extent_buffer *buf); +void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf); int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fefe83ad2059..f5e7cae63d80 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -49,17 +49,23 @@ struct pending_extent_op { int del; }; -static int finish_current_insert(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, int all); -static int del_pending_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, int all); -static int pin_down_bytes(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, int is_data); +static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner, struct btrfs_key *ins, + int ref_mod); +static int update_reserved_extents(struct btrfs_root *root, + u64 bytenr, u64 num, int reserve); static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free); +static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, int pin, + int ref_to_drop); static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, @@ -554,262 +560,13 @@ out: return ret; } -/* - * updates all the backrefs that are pending on update_list for the - * extent_root - */ -static noinline int update_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct btrfs_path *path, - struct list_head *update_list) -{ - struct btrfs_key key; - struct btrfs_extent_ref *ref; - struct btrfs_fs_info *info = extent_root->fs_info; - struct pending_extent_op *op; - struct extent_buffer *leaf; - int ret = 0; - struct list_head *cur = update_list->next; - u64 ref_objectid; - u64 ref_root = extent_root->root_key.objectid; - - op = list_entry(cur, struct pending_extent_op, list); - -search: - key.objectid = op->bytenr; - key.type = BTRFS_EXTENT_REF_KEY; - key.offset = op->orig_parent; - - ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); - BUG_ON(ret); - - leaf = path->nodes[0]; - -loop: - ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); - - ref_objectid = btrfs_ref_objectid(leaf, ref); - - if (btrfs_ref_root(leaf, ref) != ref_root || - btrfs_ref_generation(leaf, ref) != op->orig_generation || - (ref_objectid != op->level && - ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { - printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " - "root %llu, owner %u\n", - (unsigned long long)op->bytenr, - (unsigned long long)op->orig_parent, - (unsigned long long)ref_root, op->level); - btrfs_print_leaf(extent_root, leaf); - BUG(); - } - - key.objectid = op->bytenr; - key.offset = op->parent; - key.type = BTRFS_EXTENT_REF_KEY; - ret = btrfs_set_item_key_safe(trans, extent_root, path, &key); - BUG_ON(ret); - ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); - btrfs_set_ref_generation(leaf, ref, op->generation); - - cur = cur->next; - - list_del_init(&op->list); - unlock_extent(&info->extent_ins, op->bytenr, - op->bytenr + op->num_bytes - 1, GFP_NOFS); - kfree(op); - - if (cur == update_list) { - btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(extent_root, path); - goto out; - } - - op = list_entry(cur, struct pending_extent_op, list); - - path->slots[0]++; - while (path->slots[0] < btrfs_header_nritems(leaf)) { - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid == op->bytenr && - key.type == BTRFS_EXTENT_REF_KEY) - goto loop; - path->slots[0]++; - } - - btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(extent_root, path); - goto search; - -out: - return 0; -} - -static noinline int insert_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct btrfs_path *path, - struct list_head *insert_list, int nr) -{ - struct btrfs_key *keys; - u32 *data_size; - struct pending_extent_op *op; - struct extent_buffer *leaf; - struct list_head *cur = insert_list->next; - struct btrfs_fs_info *info = extent_root->fs_info; - u64 ref_root = extent_root->root_key.objectid; - int i = 0, last = 0, ret; - int total = nr * 2; - - if (!nr) - return 0; - - keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS); - if (!keys) - return -ENOMEM; - - data_size = kzalloc(total * sizeof(u32), GFP_NOFS); - if (!data_size) { - kfree(keys); - return -ENOMEM; - } - - list_for_each_entry(op, insert_list, list) { - keys[i].objectid = op->bytenr; - keys[i].offset = op->num_bytes; - keys[i].type = BTRFS_EXTENT_ITEM_KEY; - data_size[i] = sizeof(struct btrfs_extent_item); - i++; - - keys[i].objectid = op->bytenr; - keys[i].offset = op->parent; - keys[i].type = BTRFS_EXTENT_REF_KEY; - data_size[i] = sizeof(struct btrfs_extent_ref); - i++; - } - - op = list_entry(cur, struct pending_extent_op, list); - i = 0; - while (i < total) { - int c; - ret = btrfs_insert_some_items(trans, extent_root, path, - keys+i, data_size+i, total-i); - BUG_ON(ret < 0); - - if (last && ret > 1) - BUG(); - - leaf = path->nodes[0]; - for (c = 0; c < ret; c++) { - int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY; - - /* - * if the first item we inserted was a backref, then - * the EXTENT_ITEM will be the odd c's, else it will - * be the even c's - */ - if ((ref_first && (c % 2)) || - (!ref_first && !(c % 2))) { - struct btrfs_extent_item *itm; - - itm = btrfs_item_ptr(leaf, path->slots[0] + c, - struct btrfs_extent_item); - btrfs_set_extent_refs(path->nodes[0], itm, 1); - op->del++; - } else { - struct btrfs_extent_ref *ref; - - ref = btrfs_item_ptr(leaf, path->slots[0] + c, - struct btrfs_extent_ref); - btrfs_set_ref_root(leaf, ref, ref_root); - btrfs_set_ref_generation(leaf, ref, - op->generation); - btrfs_set_ref_objectid(leaf, ref, op->level); - btrfs_set_ref_num_refs(leaf, ref, 1); - op->del++; - } - - /* - * using del to see when its ok to free up the - * pending_extent_op. In the case where we insert the - * last item on the list in order to help do batching - * we need to not free the extent op until we actually - * insert the extent_item - */ - if (op->del == 2) { - unlock_extent(&info->extent_ins, op->bytenr, - op->bytenr + op->num_bytes - 1, - GFP_NOFS); - cur = cur->next; - list_del_init(&op->list); - kfree(op); - if (cur != insert_list) - op = list_entry(cur, - struct pending_extent_op, - list); - } - } - btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(extent_root, path); - - /* - * Ok backref's and items usually go right next to eachother, - * but if we could only insert 1 item that means that we - * inserted on the end of a leaf, and we have no idea what may - * be on the next leaf so we just play it safe. In order to - * try and help this case we insert the last thing on our - * insert list so hopefully it will end up being the last - * thing on the leaf and everything else will be before it, - * which will let us insert a whole bunch of items at the same - * time. - */ - if (ret == 1 && !last && (i + ret < total)) { - /* - * last: where we will pick up the next time around - * i: our current key to insert, will be total - 1 - * cur: the current op we are screwing with - * op: duh - */ - last = i + ret; - i = total - 1; - cur = insert_list->prev; - op = list_entry(cur, struct pending_extent_op, list); - } else if (last) { - /* - * ok we successfully inserted the last item on the - * list, lets reset everything - * - * i: our current key to insert, so where we left off - * last time - * last: done with this - * cur: the op we are messing with - * op: duh - * total: since we inserted the last key, we need to - * decrement total so we dont overflow - */ - i = last; - last = 0; - total--; - if (i < total) { - cur = insert_list->next; - op = list_entry(cur, struct pending_extent_op, - list); - } - } else { - i += ret; - } - - cond_resched(); - } - ret = 0; - kfree(keys); - kfree(data_size); - return ret; -} - static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, u64 parent, u64 ref_root, u64 ref_generation, - u64 owner_objectid) + u64 owner_objectid, + int refs_to_add) { struct btrfs_key key; struct extent_buffer *leaf; @@ -829,9 +586,10 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, btrfs_set_ref_root(leaf, ref, ref_root); btrfs_set_ref_generation(leaf, ref, ref_generation); btrfs_set_ref_objectid(leaf, ref, owner_objectid); - btrfs_set_ref_num_refs(leaf, ref, 1); + btrfs_set_ref_num_refs(leaf, ref, refs_to_add); } else if (ret == -EEXIST) { u64 existing_owner; + BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], @@ -845,7 +603,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, num_refs = btrfs_ref_num_refs(leaf, ref); BUG_ON(num_refs == 0); - btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); + btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add); existing_owner = btrfs_ref_objectid(leaf, ref); if (existing_owner != owner_objectid && @@ -857,6 +615,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, } else { goto out; } + btrfs_unlock_up_safe(path, 1); btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); @@ -865,7 +624,8 @@ out: static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path) + struct btrfs_path *path, + int refs_to_drop) { struct extent_buffer *leaf; struct btrfs_extent_ref *ref; @@ -875,8 +635,8 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); num_refs = btrfs_ref_num_refs(leaf, ref); - BUG_ON(num_refs == 0); - num_refs -= 1; + BUG_ON(num_refs < refs_to_drop); + num_refs -= refs_to_drop; if (num_refs == 0) { ret = btrfs_del_item(trans, root, path); } else { @@ -927,332 +687,28 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, #endif } -static noinline int free_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct list_head *del_list) -{ - struct btrfs_fs_info *info = extent_root->fs_info; - struct btrfs_path *path; - struct btrfs_key key, found_key; - struct extent_buffer *leaf; - struct list_head *cur; - struct pending_extent_op *op; - struct btrfs_extent_item *ei; - int ret, num_to_del, extent_slot = 0, found_extent = 0; - u32 refs; - u64 bytes_freed = 0; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - path->reada = 1; - -search: - /* search for the backref for the current ref we want to delete */ - cur = del_list->next; - op = list_entry(cur, struct pending_extent_op, list); - ret = lookup_extent_backref(trans, extent_root, path, op->bytenr, - op->orig_parent, - extent_root->root_key.objectid, - op->orig_generation, op->level, 1); - if (ret) { - printk(KERN_ERR "btrfs unable to find backref byte nr %llu " - "root %llu gen %llu owner %u\n", - (unsigned long long)op->bytenr, - (unsigned long long)extent_root->root_key.objectid, - (unsigned long long)op->orig_generation, op->level); - btrfs_print_leaf(extent_root, path->nodes[0]); - WARN_ON(1); - goto out; - } - - extent_slot = path->slots[0]; - num_to_del = 1; - found_extent = 0; - - /* - * if we aren't the first item on the leaf we can move back one and see - * if our ref is right next to our extent item - */ - if (likely(extent_slot)) { - extent_slot--; - btrfs_item_key_to_cpu(path->nodes[0], &found_key, - extent_slot); - if (found_key.objectid == op->bytenr && - found_key.type == BTRFS_EXTENT_ITEM_KEY && - found_key.offset == op->num_bytes) { - num_to_del++; - found_extent = 1; - } - } - - /* - * if we didn't find the extent we need to delete the backref and then - * search for the extent item key so we can update its ref count - */ - if (!found_extent) { - key.objectid = op->bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = op->num_bytes; - - ret = remove_extent_backref(trans, extent_root, path); - BUG_ON(ret); - btrfs_release_path(extent_root, path); - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); - BUG_ON(ret); - extent_slot = path->slots[0]; - } - - /* this is where we update the ref count for the extent */ - leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); - refs = btrfs_extent_refs(leaf, ei); - BUG_ON(refs == 0); - refs--; - btrfs_set_extent_refs(leaf, ei, refs); - - btrfs_mark_buffer_dirty(leaf); - - /* - * This extent needs deleting. The reason cur_slot is extent_slot + - * num_to_del is because extent_slot points to the slot where the extent - * is, and if the backref was not right next to the extent we will be - * deleting at least 1 item, and will want to start searching at the - * slot directly next to extent_slot. However if we did find the - * backref next to the extent item them we will be deleting at least 2 - * items and will want to start searching directly after the ref slot - */ - if (!refs) { - struct list_head *pos, *n, *end; - int cur_slot = extent_slot+num_to_del; - u64 super_used; - u64 root_used; - - path->slots[0] = extent_slot; - bytes_freed = op->num_bytes; - - mutex_lock(&info->pinned_mutex); - ret = pin_down_bytes(trans, extent_root, op->bytenr, - op->num_bytes, op->level >= - BTRFS_FIRST_FREE_OBJECTID); - mutex_unlock(&info->pinned_mutex); - BUG_ON(ret < 0); - op->del = ret; - - /* - * we need to see if we can delete multiple things at once, so - * start looping through the list of extents we are wanting to - * delete and see if their extent/backref's are right next to - * eachother and the extents only have 1 ref - */ - for (pos = cur->next; pos != del_list; pos = pos->next) { - struct pending_extent_op *tmp; - - tmp = list_entry(pos, struct pending_extent_op, list); - - /* we only want to delete extent+ref at this stage */ - if (cur_slot >= btrfs_header_nritems(leaf) - 1) - break; - - btrfs_item_key_to_cpu(leaf, &found_key, cur_slot); - if (found_key.objectid != tmp->bytenr || - found_key.type != BTRFS_EXTENT_ITEM_KEY || - found_key.offset != tmp->num_bytes) - break; - - /* check to make sure this extent only has one ref */ - ei = btrfs_item_ptr(leaf, cur_slot, - struct btrfs_extent_item); - if (btrfs_extent_refs(leaf, ei) != 1) - break; - - btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1); - if (found_key.objectid != tmp->bytenr || - found_key.type != BTRFS_EXTENT_REF_KEY || - found_key.offset != tmp->orig_parent) - break; - - /* - * the ref is right next to the extent, we can set the - * ref count to 0 since we will delete them both now - */ - btrfs_set_extent_refs(leaf, ei, 0); - - /* pin down the bytes for this extent */ - mutex_lock(&info->pinned_mutex); - ret = pin_down_bytes(trans, extent_root, tmp->bytenr, - tmp->num_bytes, tmp->level >= - BTRFS_FIRST_FREE_OBJECTID); - mutex_unlock(&info->pinned_mutex); - BUG_ON(ret < 0); - - /* - * use the del field to tell if we need to go ahead and - * free up the extent when we delete the item or not. - */ - tmp->del = ret; - bytes_freed += tmp->num_bytes; - - num_to_del += 2; - cur_slot += 2; - } - end = pos; - - /* update the free space counters */ - spin_lock(&info->delalloc_lock); - super_used = btrfs_super_bytes_used(&info->super_copy); - btrfs_set_super_bytes_used(&info->super_copy, - super_used - bytes_freed); - - root_used = btrfs_root_used(&extent_root->root_item); - btrfs_set_root_used(&extent_root->root_item, - root_used - bytes_freed); - spin_unlock(&info->delalloc_lock); - - /* delete the items */ - ret = btrfs_del_items(trans, extent_root, path, - path->slots[0], num_to_del); - BUG_ON(ret); - - /* - * loop through the extents we deleted and do the cleanup work - * on them - */ - for (pos = cur, n = pos->next; pos != end; - pos = n, n = pos->next) { - struct pending_extent_op *tmp; - tmp = list_entry(pos, struct pending_extent_op, list); - - /* - * remember tmp->del tells us wether or not we pinned - * down the extent - */ - ret = update_block_group(trans, extent_root, - tmp->bytenr, tmp->num_bytes, 0, - tmp->del); - BUG_ON(ret); - - list_del_init(&tmp->list); - unlock_extent(&info->extent_ins, tmp->bytenr, - tmp->bytenr + tmp->num_bytes - 1, - GFP_NOFS); - kfree(tmp); - } - } else if (refs && found_extent) { - /* - * the ref and extent were right next to eachother, but the - * extent still has a ref, so just free the backref and keep - * going - */ - ret = remove_extent_backref(trans, extent_root, path); - BUG_ON(ret); - - list_del_init(&op->list); - unlock_extent(&info->extent_ins, op->bytenr, - op->bytenr + op->num_bytes - 1, GFP_NOFS); - kfree(op); - } else { - /* - * the extent has multiple refs and the backref we were looking - * for was not right next to it, so just unlock and go next, - * we're good to go - */ - list_del_init(&op->list); - unlock_extent(&info->extent_ins, op->bytenr, - op->bytenr + op->num_bytes - 1, GFP_NOFS); - kfree(op); - } - - btrfs_release_path(extent_root, path); - if (!list_empty(del_list)) - goto search; - -out: - btrfs_free_path(path); - return ret; -} - static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 orig_parent, u64 parent, u64 orig_root, u64 ref_root, u64 orig_generation, u64 ref_generation, u64 owner_objectid) { int ret; - struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_path *path; - - if (root == root->fs_info->extent_root) { - struct pending_extent_op *extent_op; - u64 num_bytes; - - BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); - num_bytes = btrfs_level_size(root, (int)owner_objectid); - mutex_lock(&root->fs_info->extent_ins_mutex); - if (test_range_bit(&root->fs_info->extent_ins, bytenr, - bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { - u64 priv; - ret = get_state_private(&root->fs_info->extent_ins, - bytenr, &priv); - BUG_ON(ret); - extent_op = (struct pending_extent_op *) - (unsigned long)priv; - BUG_ON(extent_op->parent != orig_parent); - BUG_ON(extent_op->generation != orig_generation); + int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID; - extent_op->parent = parent; - extent_op->generation = ref_generation; - } else { - extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); - BUG_ON(!extent_op); - - extent_op->type = PENDING_BACKREF_UPDATE; - extent_op->bytenr = bytenr; - extent_op->num_bytes = num_bytes; - extent_op->parent = parent; - extent_op->orig_parent = orig_parent; - extent_op->generation = ref_generation; - extent_op->orig_generation = orig_generation; - extent_op->level = (int)owner_objectid; - INIT_LIST_HEAD(&extent_op->list); - extent_op->del = 0; - - set_extent_bits(&root->fs_info->extent_ins, - bytenr, bytenr + num_bytes - 1, - EXTENT_WRITEBACK, GFP_NOFS); - set_state_private(&root->fs_info->extent_ins, - bytenr, (unsigned long)extent_op); - } - mutex_unlock(&root->fs_info->extent_ins_mutex); - return 0; - } - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - ret = lookup_extent_backref(trans, extent_root, path, - bytenr, orig_parent, orig_root, - orig_generation, owner_objectid, 1); - if (ret) - goto out; - ret = remove_extent_backref(trans, extent_root, path); - if (ret) - goto out; - ret = insert_extent_backref(trans, extent_root, path, bytenr, - parent, ref_root, ref_generation, - owner_objectid); + ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes, + orig_parent, parent, orig_root, + ref_root, orig_generation, + ref_generation, owner_objectid, pin); BUG_ON(ret); - finish_current_insert(trans, extent_root, 0); - del_pending_extents(trans, extent_root, 0); -out: - btrfs_free_path(path); return ret; } int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, - u64 orig_parent, u64 parent, + u64 num_bytes, u64 orig_parent, u64 parent, u64 ref_root, u64 ref_generation, u64 owner_objectid) { @@ -1260,20 +716,36 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, if (ref_root == BTRFS_TREE_LOG_OBJECTID && owner_objectid < BTRFS_FIRST_FREE_OBJECTID) return 0; - ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, - parent, ref_root, ref_root, - ref_generation, ref_generation, - owner_objectid); + + ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes, + orig_parent, parent, ref_root, + ref_root, ref_generation, + ref_generation, owner_objectid); return ret; } - static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 orig_parent, u64 parent, u64 orig_root, u64 ref_root, u64 orig_generation, u64 ref_generation, u64 owner_objectid) { + int ret; + + ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root, + ref_generation, owner_objectid, + BTRFS_ADD_DELAYED_REF, 0); + BUG_ON(ret); + return ret; +} + +static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 parent, u64 ref_root, + u64 ref_generation, u64 owner_objectid, + int refs_to_add) +{ struct btrfs_path *path; int ret; struct btrfs_key key; @@ -1286,17 +758,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = 1; + path->leave_spinning = 1; key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = (u64)-1; + key.offset = num_bytes; - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, - 0, 1); - if (ret < 0) + /* first find the extent item and update its reference count */ + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, + path, 0, 1); + if (ret < 0) { + btrfs_set_path_blocking(path); return ret; - BUG_ON(ret == 0 || path->slots[0] == 0); + } - path->slots[0]--; + if (ret > 0) { + WARN_ON(1); + btrfs_free_path(path); + return -EIO; + } l = path->nodes[0]; btrfs_item_key_to_cpu(l, &key, path->slots[0]); @@ -1310,21 +789,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(l, item); - btrfs_set_extent_refs(l, item, refs + 1); + btrfs_set_extent_refs(l, item, refs + refs_to_add); + btrfs_unlock_up_safe(path, 1); + btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); path->reada = 1; + path->leave_spinning = 1; + + /* now insert the actual backref */ ret = insert_extent_backref(trans, root->fs_info->extent_root, path, bytenr, parent, ref_root, ref_generation, - owner_objectid); + owner_objectid, refs_to_add); BUG_ON(ret); - finish_current_insert(trans, root->fs_info->extent_root, 0); - del_pending_extents(trans, root->fs_info->extent_root, 0); - btrfs_free_path(path); return 0; } @@ -1339,68 +821,278 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (ref_root == BTRFS_TREE_LOG_OBJECTID && owner_objectid < BTRFS_FIRST_FREE_OBJECTID) return 0; - ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, + + ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent, 0, ref_root, 0, ref_generation, owner_objectid); return ret; } -int btrfs_extent_post_op(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int drop_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_delayed_ref_node *node) +{ + int ret = 0; + struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node); + + BUG_ON(node->ref_mod == 0); + ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, + node->parent, ref->root, ref->generation, + ref->owner_objectid, ref->pin, node->ref_mod); + + return ret; +} + +/* helper function to actually process a single delayed ref entry */ +static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_delayed_ref_node *node, + int insert_reserved) { - u64 start; - u64 end; int ret; + struct btrfs_delayed_ref *ref; + + if (node->parent == (u64)-1) { + struct btrfs_delayed_ref_head *head; + /* + * we've hit the end of the chain and we were supposed + * to insert this extent into the tree. But, it got + * deleted before we ever needed to insert it, so all + * we have to do is clean up the accounting + */ + if (insert_reserved) { + update_reserved_extents(root, node->bytenr, + node->num_bytes, 0); + } + head = btrfs_delayed_node_to_head(node); + mutex_unlock(&head->mutex); + return 0; + } - while(1) { - finish_current_insert(trans, root->fs_info->extent_root, 1); - del_pending_extents(trans, root->fs_info->extent_root, 1); + ref = btrfs_delayed_node_to_ref(node); + if (ref->action == BTRFS_ADD_DELAYED_REF) { + if (insert_reserved) { + struct btrfs_key ins; - /* is there more work to do? */ - ret = find_first_extent_bit(&root->fs_info->pending_del, - 0, &start, &end, EXTENT_WRITEBACK); - if (!ret) - continue; - ret = find_first_extent_bit(&root->fs_info->extent_ins, - 0, &start, &end, EXTENT_WRITEBACK); - if (!ret) - continue; - break; + ins.objectid = node->bytenr; + ins.offset = node->num_bytes; + ins.type = BTRFS_EXTENT_ITEM_KEY; + + /* record the full extent allocation */ + ret = __btrfs_alloc_reserved_extent(trans, root, + node->parent, ref->root, + ref->generation, ref->owner_objectid, + &ins, node->ref_mod); + update_reserved_extents(root, node->bytenr, + node->num_bytes, 0); + } else { + /* just add one backref */ + ret = add_extent_ref(trans, root, node->bytenr, + node->num_bytes, + node->parent, ref->root, ref->generation, + ref->owner_objectid, node->ref_mod); + } + BUG_ON(ret); + } else if (ref->action == BTRFS_DROP_DELAYED_REF) { + WARN_ON(insert_reserved); + ret = drop_delayed_ref(trans, root, node); } return 0; } -int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u32 *refs) +static noinline struct btrfs_delayed_ref_node * +select_delayed_ref(struct btrfs_delayed_ref_head *head) { - struct btrfs_path *path; + struct rb_node *node; + struct btrfs_delayed_ref_node *ref; + int action = BTRFS_ADD_DELAYED_REF; +again: + /* + * select delayed ref of type BTRFS_ADD_DELAYED_REF first. + * this prevents ref count from going down to zero when + * there still are pending delayed ref. + */ + node = rb_prev(&head->node.rb_node); + while (1) { + if (!node) + break; + ref = rb_entry(node, struct btrfs_delayed_ref_node, + rb_node); + if (ref->bytenr != head->node.bytenr) + break; + if (btrfs_delayed_node_to_ref(ref)->action == action) + return ref; + node = rb_prev(node); + } + if (action == BTRFS_ADD_DELAYED_REF) { + action = BTRFS_DROP_DELAYED_REF; + goto again; + } + return NULL; +} + +static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct list_head *cluster) +{ + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_node *ref; + struct btrfs_delayed_ref_head *locked_ref = NULL; int ret; - struct btrfs_key key; - struct extent_buffer *l; - struct btrfs_extent_item *item; + int count = 0; + int must_insert_reserved = 0; - WARN_ON(num_bytes < root->sectorsize); - path = btrfs_alloc_path(); - path->reada = 1; - key.objectid = bytenr; - key.offset = num_bytes; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, - 0, 0); - if (ret < 0) - goto out; - if (ret != 0) { - btrfs_print_leaf(root, path->nodes[0]); - printk(KERN_INFO "btrfs failed to find block number %llu\n", - (unsigned long long)bytenr); - BUG(); + delayed_refs = &trans->transaction->delayed_refs; + while (1) { + if (!locked_ref) { + /* pick a new head ref from the cluster list */ + if (list_empty(cluster)) + break; + + locked_ref = list_entry(cluster->next, + struct btrfs_delayed_ref_head, cluster); + + /* grab the lock that says we are going to process + * all the refs for this head */ + ret = btrfs_delayed_ref_lock(trans, locked_ref); + + /* + * we may have dropped the spin lock to get the head + * mutex lock, and that might have given someone else + * time to free the head. If that's true, it has been + * removed from our list and we can move on. + */ + if (ret == -EAGAIN) { + locked_ref = NULL; + count++; + continue; + } + } + + /* + * record the must insert reserved flag before we + * drop the spin lock. + */ + must_insert_reserved = locked_ref->must_insert_reserved; + locked_ref->must_insert_reserved = 0; + + /* + * locked_ref is the head node, so we have to go one + * node back for any delayed ref updates + */ + ref = select_delayed_ref(locked_ref); + if (!ref) { + /* All delayed refs have been processed, Go ahead + * and send the head node to run_one_delayed_ref, + * so that any accounting fixes can happen + */ + ref = &locked_ref->node; + list_del_init(&locked_ref->cluster); + locked_ref = NULL; + } + + ref->in_tree = 0; + rb_erase(&ref->rb_node, &delayed_refs->root); + delayed_refs->num_entries--; + spin_unlock(&delayed_refs->lock); + + ret = run_one_delayed_ref(trans, root, ref, + must_insert_reserved); + BUG_ON(ret); + btrfs_put_delayed_ref(ref); + + count++; + cond_resched(); + spin_lock(&delayed_refs->lock); + } + return count; +} + +/* + * this starts processing the delayed reference count updates and + * extent insertions we have queued up so far. count can be + * 0, which means to process everything in the tree at the start + * of the run (but not newly added entries), or it can be some target + * number you'd like to process. + */ +int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, unsigned long count) +{ + struct rb_node *node; + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_node *ref; + struct list_head cluster; + int ret; + int run_all = count == (unsigned long)-1; + int run_most = 0; + + if (root == root->fs_info->extent_root) + root = root->fs_info->tree_root; + + delayed_refs = &trans->transaction->delayed_refs; + INIT_LIST_HEAD(&cluster); +again: + spin_lock(&delayed_refs->lock); + if (count == 0) { + count = delayed_refs->num_entries * 2; + run_most = 1; + } + while (1) { + if (!(run_all || run_most) && + delayed_refs->num_heads_ready < 64) + break; + + /* + * go find something we can process in the rbtree. We start at + * the beginning of the tree, and then build a cluster + * of refs to process starting at the first one we are able to + * lock + */ + ret = btrfs_find_ref_cluster(trans, &cluster, + delayed_refs->run_delayed_start); + if (ret) + break; + + ret = run_clustered_refs(trans, root, &cluster); + BUG_ON(ret < 0); + + count -= min_t(unsigned long, ret, count); + + if (count == 0) + break; + } + + if (run_all) { + node = rb_first(&delayed_refs->root); + if (!node) + goto out; + count = (unsigned long)-1; + + while (node) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, + rb_node); + if (btrfs_delayed_ref_is_head(ref)) { + struct btrfs_delayed_ref_head *head; + + head = btrfs_delayed_node_to_head(ref); + atomic_inc(&ref->refs); + + spin_unlock(&delayed_refs->lock); + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + + btrfs_put_delayed_ref(ref); + cond_resched(); + goto again; + } + node = rb_next(node); + } + spin_unlock(&delayed_refs->lock); + schedule_timeout(1); + goto again; } - l = path->nodes[0]; - item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - *refs = btrfs_extent_refs(l, item); out: - btrfs_free_path(path); + spin_unlock(&delayed_refs->lock); return 0; } @@ -1624,7 +1316,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, int refi = 0; int slot; int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, - u64, u64, u64, u64, u64, u64, u64, u64); + u64, u64, u64, u64, u64, u64, u64, u64, u64); ref_root = btrfs_header_owner(buf); ref_generation = btrfs_header_generation(buf); @@ -1696,12 +1388,19 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, if (level == 0) { btrfs_item_key_to_cpu(buf, &key, slot); + fi = btrfs_item_ptr(buf, slot, + struct btrfs_file_extent_item); + + bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (bytenr == 0) + continue; ret = process_func(trans, root, bytenr, - orig_buf->start, buf->start, - orig_root, ref_root, - orig_generation, ref_generation, - key.objectid); + btrfs_file_extent_disk_num_bytes(buf, fi), + orig_buf->start, buf->start, + orig_root, ref_root, + orig_generation, ref_generation, + key.objectid); if (ret) { faili = slot; @@ -1709,7 +1408,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, goto fail; } } else { - ret = process_func(trans, root, bytenr, + ret = process_func(trans, root, bytenr, buf->len, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, @@ -1786,17 +1485,17 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans, if (bytenr == 0) continue; ret = __btrfs_update_extent_ref(trans, root, bytenr, - orig_buf->start, buf->start, - orig_root, ref_root, - orig_generation, ref_generation, - key.objectid); + btrfs_file_extent_disk_num_bytes(buf, fi), + orig_buf->start, buf->start, + orig_root, ref_root, orig_generation, + ref_generation, key.objectid); if (ret) goto fail; } else { bytenr = btrfs_node_blockptr(buf, slot); ret = __btrfs_update_extent_ref(trans, root, bytenr, - orig_buf->start, buf->start, - orig_root, ref_root, + buf->len, orig_buf->start, + buf->start, orig_root, ref_root, orig_generation, ref_generation, level - 1); if (ret) @@ -1815,7 +1514,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache) { int ret; - int pending_ret; struct btrfs_root *extent_root = root->fs_info->extent_root; unsigned long bi; struct extent_buffer *leaf; @@ -1831,12 +1529,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(extent_root, path); fail: - finish_current_insert(trans, extent_root, 0); - pending_ret = del_pending_extents(trans, extent_root, 0); if (ret) return ret; - if (pending_ret) - return pending_ret; return 0; } @@ -2361,6 +2055,8 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, clear_extent_dirty(&fs_info->pinned_extents, bytenr, bytenr + num - 1, GFP_NOFS); } + mutex_unlock(&root->fs_info->pinned_mutex); + while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); BUG_ON(!cache); @@ -2452,8 +2148,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 end; int ret; - mutex_lock(&root->fs_info->pinned_mutex); while (1) { + mutex_lock(&root->fs_info->pinned_mutex); ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); if (ret) @@ -2461,209 +2157,21 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ret = btrfs_discard_extent(root, start, end + 1 - start); + /* unlocks the pinned mutex */ btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); - if (need_resched()) { - mutex_unlock(&root->fs_info->pinned_mutex); - cond_resched(); - mutex_lock(&root->fs_info->pinned_mutex); - } + cond_resched(); } mutex_unlock(&root->fs_info->pinned_mutex); return ret; } -static int finish_current_insert(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, int all) -{ - u64 start; - u64 end; - u64 priv; - u64 search = 0; - struct btrfs_fs_info *info = extent_root->fs_info; - struct btrfs_path *path; - struct pending_extent_op *extent_op, *tmp; - struct list_head insert_list, update_list; - int ret; - int num_inserts = 0, max_inserts, restart = 0; - - path = btrfs_alloc_path(); - INIT_LIST_HEAD(&insert_list); - INIT_LIST_HEAD(&update_list); - - max_inserts = extent_root->leafsize / - (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) + - sizeof(struct btrfs_extent_ref) + - sizeof(struct btrfs_extent_item)); -again: - mutex_lock(&info->extent_ins_mutex); - while (1) { - ret = find_first_extent_bit(&info->extent_ins, search, &start, - &end, EXTENT_WRITEBACK); - if (ret) { - if (restart && !num_inserts && - list_empty(&update_list)) { - restart = 0; - search = 0; - continue; - } - break; - } - - ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); - if (!ret) { - if (all) - restart = 1; - search = end + 1; - if (need_resched()) { - mutex_unlock(&info->extent_ins_mutex); - cond_resched(); - mutex_lock(&info->extent_ins_mutex); - } - continue; - } - - ret = get_state_private(&info->extent_ins, start, &priv); - BUG_ON(ret); - extent_op = (struct pending_extent_op *)(unsigned long) priv; - - if (extent_op->type == PENDING_EXTENT_INSERT) { - num_inserts++; - list_add_tail(&extent_op->list, &insert_list); - search = end + 1; - if (num_inserts == max_inserts) { - restart = 1; - break; - } - } else if (extent_op->type == PENDING_BACKREF_UPDATE) { - list_add_tail(&extent_op->list, &update_list); - search = end + 1; - } else { - BUG(); - } - } - - /* - * process the update list, clear the writeback bit for it, and if - * somebody marked this thing for deletion then just unlock it and be - * done, the free_extents will handle it - */ - list_for_each_entry_safe(extent_op, tmp, &update_list, list) { - clear_extent_bits(&info->extent_ins, extent_op->bytenr, - extent_op->bytenr + extent_op->num_bytes - 1, - EXTENT_WRITEBACK, GFP_NOFS); - if (extent_op->del) { - list_del_init(&extent_op->list); - unlock_extent(&info->extent_ins, extent_op->bytenr, - extent_op->bytenr + extent_op->num_bytes - - 1, GFP_NOFS); - kfree(extent_op); - } - } - mutex_unlock(&info->extent_ins_mutex); - - /* - * still have things left on the update list, go ahead an update - * everything - */ - if (!list_empty(&update_list)) { - ret = update_backrefs(trans, extent_root, path, &update_list); - BUG_ON(ret); - - /* we may have COW'ed new blocks, so lets start over */ - if (all) - restart = 1; - } - - /* - * if no inserts need to be done, but we skipped some extents and we - * need to make sure everything is cleaned then reset everything and - * go back to the beginning - */ - if (!num_inserts && restart) { - search = 0; - restart = 0; - INIT_LIST_HEAD(&update_list); - INIT_LIST_HEAD(&insert_list); - goto again; - } else if (!num_inserts) { - goto out; - } - - /* - * process the insert extents list. Again if we are deleting this - * extent, then just unlock it, pin down the bytes if need be, and be - * done with it. Saves us from having to actually insert the extent - * into the tree and then subsequently come along and delete it - */ - mutex_lock(&info->extent_ins_mutex); - list_for_each_entry_safe(extent_op, tmp, &insert_list, list) { - clear_extent_bits(&info->extent_ins, extent_op->bytenr, - extent_op->bytenr + extent_op->num_bytes - 1, - EXTENT_WRITEBACK, GFP_NOFS); - if (extent_op->del) { - u64 used; - list_del_init(&extent_op->list); - unlock_extent(&info->extent_ins, extent_op->bytenr, - extent_op->bytenr + extent_op->num_bytes - - 1, GFP_NOFS); - - mutex_lock(&extent_root->fs_info->pinned_mutex); - ret = pin_down_bytes(trans, extent_root, - extent_op->bytenr, - extent_op->num_bytes, 0); - mutex_unlock(&extent_root->fs_info->pinned_mutex); - - spin_lock(&info->delalloc_lock); - used = btrfs_super_bytes_used(&info->super_copy); - btrfs_set_super_bytes_used(&info->super_copy, - used - extent_op->num_bytes); - used = btrfs_root_used(&extent_root->root_item); - btrfs_set_root_used(&extent_root->root_item, - used - extent_op->num_bytes); - spin_unlock(&info->delalloc_lock); - - ret = update_block_group(trans, extent_root, - extent_op->bytenr, - extent_op->num_bytes, - 0, ret > 0); - BUG_ON(ret); - kfree(extent_op); - num_inserts--; - } - } - mutex_unlock(&info->extent_ins_mutex); - - ret = insert_extents(trans, extent_root, path, &insert_list, - num_inserts); - BUG_ON(ret); - - /* - * if restart is set for whatever reason we need to go back and start - * searching through the pending list again. - * - * We just inserted some extents, which could have resulted in new - * blocks being allocated, which would result in new blocks needing - * updates, so if all is set we _must_ restart to get the updated - * blocks. - */ - if (restart || all) { - INIT_LIST_HEAD(&insert_list); - INIT_LIST_HEAD(&update_list); - search = 0; - restart = 0; - num_inserts = 0; - goto again; - } -out: - btrfs_free_path(path); - return 0; -} - static int pin_down_bytes(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes, int is_data) + struct btrfs_path *path, + u64 bytenr, u64 num_bytes, int is_data, + struct extent_buffer **must_clean) { int err = 0; struct extent_buffer *buf; @@ -2686,17 +2194,19 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, u64 header_transid = btrfs_header_generation(buf); if (header_owner != BTRFS_TREE_LOG_OBJECTID && header_owner != BTRFS_TREE_RELOC_OBJECTID && + header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID && header_transid == trans->transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { - clean_tree_block(NULL, root, buf); - btrfs_tree_unlock(buf); - free_extent_buffer(buf); + *must_clean = buf; return 1; } btrfs_tree_unlock(buf); } free_extent_buffer(buf); pinit: + btrfs_set_path_blocking(path); + mutex_lock(&root->fs_info->pinned_mutex); + /* unlocks the pinned mutex */ btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); BUG_ON(err < 0); @@ -2710,7 +2220,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner_objectid, int pin, int mark_free) + u64 owner_objectid, int pin, int mark_free, + int refs_to_drop) { struct btrfs_path *path; struct btrfs_key key; @@ -2732,6 +2243,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = 1; + path->leave_spinning = 1; ret = lookup_extent_backref(trans, extent_root, path, bytenr, parent, root_objectid, ref_generation, owner_objectid, 1); @@ -2753,9 +2265,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, break; } if (!found_extent) { - ret = remove_extent_backref(trans, extent_root, path); + ret = remove_extent_backref(trans, extent_root, path, + refs_to_drop); BUG_ON(ret); btrfs_release_path(extent_root, path); + path->leave_spinning = 1; ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { @@ -2771,8 +2285,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, btrfs_print_leaf(extent_root, path->nodes[0]); WARN_ON(1); printk(KERN_ERR "btrfs unable to find ref byte nr %llu " - "root %llu gen %llu owner %llu\n", + "parent %llu root %llu gen %llu owner %llu\n", (unsigned long long)bytenr, + (unsigned long long)parent, (unsigned long long)root_objectid, (unsigned long long)ref_generation, (unsigned long long)owner_objectid); @@ -2782,17 +2297,23 @@ static int __free_extent(struct btrfs_trans_handle *trans, ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); - BUG_ON(refs == 0); - refs -= 1; - btrfs_set_extent_refs(leaf, ei, refs); + /* + * we're not allowed to delete the extent item if there + * are other delayed ref updates pending + */ + + BUG_ON(refs < refs_to_drop); + refs -= refs_to_drop; + btrfs_set_extent_refs(leaf, ei, refs); btrfs_mark_buffer_dirty(leaf); - if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { + if (refs == 0 && found_extent && + path->slots[0] == extent_slot + 1) { struct btrfs_extent_ref *ref; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); - BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); + BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop); /* if the back ref and the extent are next to each other * they get deleted below in one shot */ @@ -2800,11 +2321,13 @@ static int __free_extent(struct btrfs_trans_handle *trans, num_to_del = 2; } else if (found_extent) { /* otherwise delete the extent back ref */ - ret = remove_extent_backref(trans, extent_root, path); + ret = remove_extent_backref(trans, extent_root, path, + refs_to_drop); BUG_ON(ret); /* if refs are 0, we need to setup the path for deletion */ if (refs == 0) { btrfs_release_path(extent_root, path); + path->leave_spinning = 1; ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); BUG_ON(ret); @@ -2814,16 +2337,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, if (refs == 0) { u64 super_used; u64 root_used; + struct extent_buffer *must_clean = NULL; if (pin) { - mutex_lock(&root->fs_info->pinned_mutex); - ret = pin_down_bytes(trans, root, bytenr, num_bytes, - owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); - mutex_unlock(&root->fs_info->pinned_mutex); + ret = pin_down_bytes(trans, root, path, + bytenr, num_bytes, + owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, + &must_clean); if (ret > 0) mark_free = 1; BUG_ON(ret < 0); } + /* block accounting for super block */ spin_lock(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); @@ -2835,14 +2360,34 @@ static int __free_extent(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, root_used - num_bytes); spin_unlock(&info->delalloc_lock); + + /* + * it is going to be very rare for someone to be waiting + * on the block we're freeing. del_items might need to + * schedule, so rather than get fancy, just force it + * to blocking here + */ + if (must_clean) + btrfs_set_lock_blocking(must_clean); + ret = btrfs_del_items(trans, extent_root, path, path->slots[0], num_to_del); BUG_ON(ret); btrfs_release_path(extent_root, path); + if (must_clean) { + clean_tree_block(NULL, root, must_clean); + btrfs_tree_unlock(must_clean); + free_extent_buffer(must_clean); + } + if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { ret = btrfs_del_csums(trans, root, bytenr, num_bytes); BUG_ON(ret); + } else { + invalidate_mapping_pages(info->btree_inode->i_mapping, + bytenr >> PAGE_CACHE_SHIFT, + (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); } ret = update_block_group(trans, root, bytenr, num_bytes, 0, @@ -2850,218 +2395,103 @@ static int __free_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } btrfs_free_path(path); - finish_current_insert(trans, extent_root, 0); return ret; } /* - * find all the blocks marked as pending in the radix tree and remove - * them from the extent map + * remove an extent from the root, returns 0 on success */ -static int del_pending_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, int all) +static int __btrfs_free_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 ref_generation, + u64 owner_objectid, int pin, + int refs_to_drop) { - int ret; - int err = 0; - u64 start; - u64 end; - u64 priv; - u64 search = 0; - int nr = 0, skipped = 0; - struct extent_io_tree *pending_del; - struct extent_io_tree *extent_ins; - struct pending_extent_op *extent_op; - struct btrfs_fs_info *info = extent_root->fs_info; - struct list_head delete_list; - - INIT_LIST_HEAD(&delete_list); - extent_ins = &extent_root->fs_info->extent_ins; - pending_del = &extent_root->fs_info->pending_del; - -again: - mutex_lock(&info->extent_ins_mutex); - while (1) { - ret = find_first_extent_bit(pending_del, search, &start, &end, - EXTENT_WRITEBACK); - if (ret) { - if (all && skipped && !nr) { - search = 0; - skipped = 0; - continue; - } - mutex_unlock(&info->extent_ins_mutex); - break; - } - - ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); - if (!ret) { - search = end+1; - skipped = 1; - - if (need_resched()) { - mutex_unlock(&info->extent_ins_mutex); - cond_resched(); - mutex_lock(&info->extent_ins_mutex); - } - - continue; - } - BUG_ON(ret < 0); - - ret = get_state_private(pending_del, start, &priv); - BUG_ON(ret); - extent_op = (struct pending_extent_op *)(unsigned long)priv; - - clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK, - GFP_NOFS); - if (!test_range_bit(extent_ins, start, end, - EXTENT_WRITEBACK, 0)) { - list_add_tail(&extent_op->list, &delete_list); - nr++; - } else { - kfree(extent_op); - - ret = get_state_private(&info->extent_ins, start, - &priv); - BUG_ON(ret); - extent_op = (struct pending_extent_op *) - (unsigned long)priv; - - clear_extent_bits(&info->extent_ins, start, end, - EXTENT_WRITEBACK, GFP_NOFS); - - if (extent_op->type == PENDING_BACKREF_UPDATE) { - list_add_tail(&extent_op->list, &delete_list); - search = end + 1; - nr++; - continue; - } - - mutex_lock(&extent_root->fs_info->pinned_mutex); - ret = pin_down_bytes(trans, extent_root, start, - end + 1 - start, 0); - mutex_unlock(&extent_root->fs_info->pinned_mutex); - - ret = update_block_group(trans, extent_root, start, - end + 1 - start, 0, ret > 0); - - unlock_extent(extent_ins, start, end, GFP_NOFS); - BUG_ON(ret); - kfree(extent_op); - } - if (ret) - err = ret; - - search = end + 1; - - if (need_resched()) { - mutex_unlock(&info->extent_ins_mutex); - cond_resched(); - mutex_lock(&info->extent_ins_mutex); - } - } + WARN_ON(num_bytes < root->sectorsize); - if (nr) { - ret = free_extents(trans, extent_root, &delete_list); - BUG_ON(ret); - } + /* + * if metadata always pin + * if data pin when any transaction has committed this + */ + if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID || + ref_generation != trans->transid) + pin = 1; - if (all && skipped) { - INIT_LIST_HEAD(&delete_list); - search = 0; - nr = 0; - goto again; - } + if (ref_generation != trans->transid) + pin = 1; - if (!err) - finish_current_insert(trans, extent_root, 0); - return err; + return __free_extent(trans, root, bytenr, num_bytes, parent, + root_objectid, ref_generation, + owner_objectid, pin, pin == 0, refs_to_drop); } /* - * remove an extent from the root, returns 0 on success + * when we free an extent, it is possible (and likely) that we free the last + * delayed ref for that extent as well. This searches the delayed ref tree for + * a given extent, and if there are no other delayed refs to be processed, it + * removes it from the tree. */ -static int __btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 ref_generation, - u64 owner_objectid, int pin) +static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr) { - struct btrfs_root *extent_root = root->fs_info->extent_root; - int pending_ret; + struct btrfs_delayed_ref_head *head; + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_node *ref; + struct rb_node *node; int ret; - WARN_ON(num_bytes < root->sectorsize); - if (root == extent_root) { - struct pending_extent_op *extent_op = NULL; - - mutex_lock(&root->fs_info->extent_ins_mutex); - if (test_range_bit(&root->fs_info->extent_ins, bytenr, - bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { - u64 priv; - ret = get_state_private(&root->fs_info->extent_ins, - bytenr, &priv); - BUG_ON(ret); - extent_op = (struct pending_extent_op *) - (unsigned long)priv; + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + head = btrfs_find_delayed_ref_head(trans, bytenr); + if (!head) + goto out; - extent_op->del = 1; - if (extent_op->type == PENDING_EXTENT_INSERT) { - mutex_unlock(&root->fs_info->extent_ins_mutex); - return 0; - } - } + node = rb_prev(&head->node.rb_node); + if (!node) + goto out; - if (extent_op) { - ref_generation = extent_op->orig_generation; - parent = extent_op->orig_parent; - } + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); - BUG_ON(!extent_op); - - extent_op->type = PENDING_EXTENT_DELETE; - extent_op->bytenr = bytenr; - extent_op->num_bytes = num_bytes; - extent_op->parent = parent; - extent_op->orig_parent = parent; - extent_op->generation = ref_generation; - extent_op->orig_generation = ref_generation; - extent_op->level = (int)owner_objectid; - INIT_LIST_HEAD(&extent_op->list); - extent_op->del = 0; - - set_extent_bits(&root->fs_info->pending_del, - bytenr, bytenr + num_bytes - 1, - EXTENT_WRITEBACK, GFP_NOFS); - set_state_private(&root->fs_info->pending_del, - bytenr, (unsigned long)extent_op); - mutex_unlock(&root->fs_info->extent_ins_mutex); - return 0; - } - /* if metadata always pin */ - if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { - if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { - mutex_lock(&root->fs_info->pinned_mutex); - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); - mutex_unlock(&root->fs_info->pinned_mutex); - update_reserved_extents(root, bytenr, num_bytes, 0); - return 0; - } - pin = 1; - } + /* there are still entries for this ref, we can't drop it */ + if (ref->bytenr == bytenr) + goto out; - /* if data pin when any transaction has committed this */ - if (ref_generation != trans->transid) - pin = 1; + /* + * waiting for the lock here would deadlock. If someone else has it + * locked they are already in the process of dropping it anyway + */ + if (!mutex_trylock(&head->mutex)) + goto out; - ret = __free_extent(trans, root, bytenr, num_bytes, parent, - root_objectid, ref_generation, - owner_objectid, pin, pin == 0); + /* + * at this point we have a head with no other entries. Go + * ahead and process it. + */ + head->node.in_tree = 0; + rb_erase(&head->node.rb_node, &delayed_refs->root); - finish_current_insert(trans, root->fs_info->extent_root, 0); - pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); - return ret ? ret : pending_ret; + delayed_refs->num_entries--; + + /* + * we don't take a ref on the node because we're removing it from the + * tree, so we just steal the ref the tree was holding. + */ + delayed_refs->num_heads--; + if (list_empty(&head->cluster)) + delayed_refs->num_heads_ready--; + + list_del_init(&head->cluster); + spin_unlock(&delayed_refs->lock); + + ret = run_one_delayed_ref(trans, root->fs_info->tree_root, + &head->node, head->must_insert_reserved); + BUG_ON(ret); + btrfs_put_delayed_ref(&head->node); + return 0; +out: + spin_unlock(&delayed_refs->lock); + return 0; } int btrfs_free_extent(struct btrfs_trans_handle *trans, @@ -3072,9 +2502,30 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, { int ret; - ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, - root_objectid, ref_generation, - owner_objectid, pin); + /* + * tree log blocks never actually go into the extent allocation + * tree, just update pinning info and exit early. + * + * data extents referenced by the tree log do need to have + * their reference counts bumped. + */ + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && + owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { + mutex_lock(&root->fs_info->pinned_mutex); + + /* unlocks the pinned mutex */ + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); + update_reserved_extents(root, bytenr, num_bytes, 0); + ret = 0; + } else { + ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, + root_objectid, ref_generation, + owner_objectid, + BTRFS_DROP_DELAYED_REF, 1); + BUG_ON(ret); + ret = check_ref_cleanup(trans, root, bytenr); + BUG_ON(ret); + } return ret; } @@ -3475,10 +2926,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 ref_generation, - u64 owner, struct btrfs_key *ins) + u64 owner, struct btrfs_key *ins, + int ref_mod) { int ret; - int pending_ret; u64 super_used; u64 root_used; u64 num_bytes = ins->offset; @@ -3503,33 +2954,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, root_used + num_bytes); spin_unlock(&info->delalloc_lock); - if (root == extent_root) { - struct pending_extent_op *extent_op; - - extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); - BUG_ON(!extent_op); - - extent_op->type = PENDING_EXTENT_INSERT; - extent_op->bytenr = ins->objectid; - extent_op->num_bytes = ins->offset; - extent_op->parent = parent; - extent_op->orig_parent = 0; - extent_op->generation = ref_generation; - extent_op->orig_generation = 0; - extent_op->level = (int)owner; - INIT_LIST_HEAD(&extent_op->list); - extent_op->del = 0; - - mutex_lock(&root->fs_info->extent_ins_mutex); - set_extent_bits(&root->fs_info->extent_ins, ins->objectid, - ins->objectid + ins->offset - 1, - EXTENT_WRITEBACK, GFP_NOFS); - set_state_private(&root->fs_info->extent_ins, - ins->objectid, (unsigned long)extent_op); - mutex_unlock(&root->fs_info->extent_ins_mutex); - goto update_block; - } - memcpy(&keys[0], ins, sizeof(*ins)); keys[1].objectid = ins->objectid; keys[1].type = BTRFS_EXTENT_REF_KEY; @@ -3540,37 +2964,31 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); + path->leave_spinning = 1; ret = btrfs_insert_empty_items(trans, extent_root, path, keys, sizes, 2); BUG_ON(ret); extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_item); - btrfs_set_extent_refs(path->nodes[0], extent_item, 1); + btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod); ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, struct btrfs_extent_ref); btrfs_set_ref_root(path->nodes[0], ref, root_objectid); btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); btrfs_set_ref_objectid(path->nodes[0], ref, owner); - btrfs_set_ref_num_refs(path->nodes[0], ref, 1); + btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod); btrfs_mark_buffer_dirty(path->nodes[0]); trans->alloc_exclude_start = 0; trans->alloc_exclude_nr = 0; btrfs_free_path(path); - finish_current_insert(trans, extent_root, 0); - pending_ret = del_pending_extents(trans, extent_root, 0); if (ret) goto out; - if (pending_ret) { - ret = pending_ret; - goto out; - } -update_block: ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); if (ret) { @@ -3592,9 +3010,12 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, if (root_objectid == BTRFS_TREE_LOG_OBJECTID) return 0; - ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, - ref_generation, owner, ins); - update_reserved_extents(root, ins->objectid, ins->offset, 0); + + ret = btrfs_add_delayed_ref(trans, ins->objectid, + ins->offset, parent, root_objectid, + ref_generation, owner, + BTRFS_ADD_DELAYED_EXTENT, 0); + BUG_ON(ret); return ret; } @@ -3621,7 +3042,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); put_block_group(block_group); ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, - ref_generation, owner, ins); + ref_generation, owner, ins, 1); return ret; } @@ -3640,20 +3061,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, u64 data) { int ret; - ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, empty_size, hint_byte, search_end, ins, data); BUG_ON(ret); if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { - ret = __btrfs_alloc_reserved_extent(trans, root, parent, - root_objectid, ref_generation, - owner_objectid, ins); + ret = btrfs_add_delayed_ref(trans, ins->objectid, + ins->offset, parent, root_objectid, + ref_generation, owner_objectid, + BTRFS_ADD_DELAYED_EXTENT, 0); BUG_ON(ret); - - } else { - update_reserved_extents(root, ins->objectid, ins->offset, 1); } + update_reserved_extents(root, ins->objectid, ins->offset, 1); return ret; } @@ -3789,7 +3208,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - ret = __btrfs_free_extent(trans, root, disk_bytenr, + ret = btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf->start, leaf_owner, leaf_generation, key.objectid, 0); @@ -3829,7 +3248,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, */ for (i = 0; i < ref->nritems; i++) { info = ref->extents + sorted[i].slot; - ret = __btrfs_free_extent(trans, root, info->bytenr, + ret = btrfs_free_extent(trans, root, info->bytenr, info->num_bytes, ref->bytenr, ref->owner, ref->generation, info->objectid, 0); @@ -3846,12 +3265,13 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, +static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 start, u64 len, u32 *refs) { int ret; - ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); + ret = btrfs_lookup_extent_ref(trans, root, start, len, refs); BUG_ON(ret); #if 0 /* some debugging code in case we see problems here */ @@ -3959,7 +3379,8 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, * we just decrement it below and don't update any * of the refs the leaf points to. */ - ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); + ret = drop_snap_lookup_refcount(trans, root, bytenr, + blocksize, &refs); BUG_ON(ret); if (refs != 1) continue; @@ -4010,7 +3431,7 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, */ for (i = 0; i < refi; i++) { bytenr = sorted[i].bytenr; - ret = __btrfs_free_extent(trans, root, bytenr, + ret = btrfs_free_extent(trans, root, bytenr, blocksize, eb->start, root_owner, root_gen, 0, 1); BUG_ON(ret); @@ -4053,7 +3474,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, + ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, path->nodes[*level]->len, &refs); BUG_ON(ret); if (refs > 1) @@ -4104,7 +3525,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); - ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); + ret = drop_snap_lookup_refcount(trans, root, bytenr, + blocksize, &refs); BUG_ON(ret); /* @@ -4119,7 +3541,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, root_gen = btrfs_header_generation(parent); path->slots[*level]++; - ret = __btrfs_free_extent(trans, root, bytenr, + ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, root_owner, root_gen, *level - 1, 1); @@ -4165,7 +3587,7 @@ out: * cleanup and free the reference on the last node * we processed */ - ret = __btrfs_free_extent(trans, root, bytenr, blocksize, + ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, root_owner, root_gen, *level, 1); free_extent_buffer(path->nodes[*level]); @@ -4354,6 +3776,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_path *path; int i; int orig_level; + int update_count; struct btrfs_root_item *root_item = &root->root_item; WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); @@ -4395,6 +3818,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root } } while (1) { + unsigned long update; wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; @@ -4407,12 +3831,21 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - if (trans->transaction->in_commit) { + if (trans->transaction->in_commit || + trans->transaction->delayed_refs.flushing) { ret = -EAGAIN; break; } atomic_inc(&root->fs_info->throttle_gen); wake_up(&root->fs_info->transaction_throttle); + for (update_count = 0; update_count < 16; update_count++) { + update = trans->delayed_ref_updates; + trans->delayed_ref_updates = 0; + if (update) + btrfs_run_delayed_refs(trans, root, update); + else + break; + } } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { @@ -5457,6 +4890,7 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, root->root_key.objectid, trans->transid, key.objectid); BUG_ON(ret); + ret = btrfs_free_extent(trans, root, bytenr, num_bytes, leaf->start, btrfs_header_owner(leaf), @@ -5768,9 +5202,6 @@ static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, ref_path, NULL, NULL); BUG_ON(ret); - if (root == root->fs_info->extent_root) - btrfs_extent_post_op(trans, root); - return 0; } @@ -6038,6 +5469,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; + path->leave_spinning = 1; ret = btrfs_insert_empty_inode(trans, root, path, objectid); if (ret) goto out; @@ -6208,6 +5640,9 @@ again: btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); mutex_unlock(&root->fs_info->cleaner_mutex); + trans = btrfs_start_transaction(info->tree_root, 1); + btrfs_commit_transaction(trans, info->tree_root); + while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) @@ -6466,7 +5901,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, extent_root = root->fs_info->extent_root; - root->fs_info->last_trans_new_blockgroup = trans->transid; + root->fs_info->last_trans_log_full_commit = trans->transid; cache = kzalloc(sizeof(*cache), GFP_NOFS); if (!cache) @@ -6500,9 +5935,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, sizeof(cache->item)); BUG_ON(ret); - finish_current_insert(trans, extent_root, 0); - ret = del_pending_extents(trans, extent_root, 0); - BUG_ON(ret); set_avail_alloc_bits(extent_root->fs_info, type); return 0; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ebe6b29e6069..08085af089e2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3124,20 +3124,15 @@ void free_extent_buffer(struct extent_buffer *eb) int clear_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb) { - int set; unsigned long i; unsigned long num_pages; struct page *page; - u64 start = eb->start; - u64 end = start + eb->len - 1; - - set = clear_extent_dirty(tree, start, end, GFP_NOFS); num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); - if (!set && !PageDirty(page)) + if (!PageDirty(page)) continue; lock_page(page); @@ -3146,22 +3141,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, else set_page_private(page, EXTENT_PAGE_PRIVATE); - /* - * if we're on the last page or the first page and the - * block isn't aligned on a page boundary, do extra checks - * to make sure we don't clean page that is partially dirty - */ - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - start = (u64)page->index << PAGE_CACHE_SHIFT; - end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, - EXTENT_DIRTY, 0)) { - unlock_page(page); - continue; - } - } clear_page_dirty_for_io(page); spin_lock_irq(&page->mapping->tree_lock); if (!PageDirty(page)) { @@ -3187,29 +3166,13 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, { unsigned long i; unsigned long num_pages; + int was_dirty = 0; + was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { - struct page *page = extent_buffer_page(eb, i); - /* writepage may need to do something special for the - * first page, we have to make sure page->private is - * properly set. releasepage may drop page->private - * on us if the page isn't already dirty. - */ - lock_page(page); - if (i == 0) { - set_page_extent_head(page, eb->len); - } else if (PagePrivate(page) && - page->private != EXTENT_PAGE_PRIVATE) { - set_page_extent_mapped(page); - } + for (i = 0; i < num_pages; i++) __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); - set_extent_dirty(tree, page_offset(page), - page_offset(page) + PAGE_CACHE_SIZE - 1, - GFP_NOFS); - unlock_page(page); - } - return 0; + return was_dirty; } int clear_extent_buffer_uptodate(struct extent_io_tree *tree, @@ -3789,6 +3752,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) ret = 0; goto out; } + if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { + ret = 0; + goto out; + } /* at this point we can safely release the extent buffer */ num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1f9df88afbf6..5bc20abf3f3d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -25,6 +25,7 @@ /* these are bit numbers for test/set bit */ #define EXTENT_BUFFER_UPTODATE 0 #define EXTENT_BUFFER_BLOCKING 1 +#define EXTENT_BUFFER_DIRTY 2 /* * page->private values. Every page that is controlled by the extent @@ -254,6 +255,8 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb); +int test_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); int set_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb); int clear_extent_buffer_uptodate(struct extent_io_tree *tree, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 964652435fd1..9b99886562d0 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -52,6 +52,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, file_key.offset = pos; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); if (ret < 0) @@ -523,6 +524,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, key.offset = end_byte - 1; key.type = BTRFS_EXTENT_CSUM_KEY; + path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) { if (path->slots[0] == 0) @@ -757,8 +759,10 @@ insert: } else { ins_size = csum_size; } + path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, root, path, &file_key, ins_size); + path->leave_spinning = 0; if (ret < 0) goto fail_unlock; if (ret != 0) { @@ -776,7 +780,6 @@ found: item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + btrfs_item_size_nr(leaf, path->slots[0])); eb_token = NULL; - cond_resched(); next_sector: if (!eb_token || @@ -817,9 +820,9 @@ next_sector: eb_token = NULL; } btrfs_mark_buffer_dirty(path->nodes[0]); - cond_resched(); if (total_bytes < sums->len) { btrfs_release_path(root, path); + cond_resched(); goto again; } out: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index dc78954861b3..9c9fb46ccd08 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -606,6 +606,7 @@ next_slot: btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); + path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); BUG_ON(ret); @@ -639,17 +640,22 @@ next_slot: ram_bytes); btrfs_set_file_extent_type(leaf, extent, found_type); + btrfs_unlock_up_safe(path, 1); btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_set_lock_blocking(path->nodes[0]); if (disk_bytenr != 0) { ret = btrfs_update_extent_ref(trans, root, - disk_bytenr, orig_parent, + disk_bytenr, + le64_to_cpu(old.disk_num_bytes), + orig_parent, leaf->start, root->root_key.objectid, trans->transid, ins.objectid); BUG_ON(ret); } + path->leave_spinning = 0; btrfs_release_path(root, path); if (disk_bytenr != 0) inode_add_bytes(inode, extent_end - end); @@ -912,7 +918,7 @@ again: btrfs_set_file_extent_other_encoding(leaf, fi, 0); if (orig_parent != leaf->start) { - ret = btrfs_update_extent_ref(trans, root, bytenr, + ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes, orig_parent, leaf->start, root->root_key.objectid, trans->transid, inode->i_ino); @@ -1155,6 +1161,20 @@ out_nolock: page_cache_release(pinned[1]); *ppos = pos; + /* + * we want to make sure fsync finds this change + * but we haven't joined a transaction running right now. + * + * Later on, someone is sure to update the inode and get the + * real transid recorded. + * + * We set last_trans now to the fs_info generation + 1, + * this will either be one more than the running transaction + * or the generation used for the next transaction if there isn't + * one running right now. + */ + BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; + if (num_written > 0 && will_write) { struct btrfs_trans_handle *trans; @@ -1167,8 +1187,11 @@ out_nolock: ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); if (ret == 0) { - btrfs_sync_log(trans, root); - btrfs_end_transaction(trans, root); + ret = btrfs_sync_log(trans, root); + if (ret == 0) + btrfs_end_transaction(trans, root); + else + btrfs_commit_transaction(trans, root); } else { btrfs_commit_transaction(trans, root); } @@ -1185,6 +1208,18 @@ out_nolock: int btrfs_release_file(struct inode *inode, struct file *filp) { + /* + * ordered_data_close is set by settattr when we are about to truncate + * a file from a non-zero size to a zero size. This tries to + * flush down new bytes that may have been written if the + * application were using truncate to replace a file in place. + */ + if (BTRFS_I(inode)->ordered_data_close) { + BTRFS_I(inode)->ordered_data_close = 0; + btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); + if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) + filemap_flush(inode->i_mapping); + } if (filp->private_data) btrfs_ioctl_trans_end(filp); return 0; @@ -1260,8 +1295,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) if (ret > 0) { ret = btrfs_commit_transaction(trans, root); } else { - btrfs_sync_log(trans, root); - ret = btrfs_end_transaction(trans, root); + ret = btrfs_sync_log(trans, root); + if (ret == 0) + ret = btrfs_end_transaction(trans, root); + else + ret = btrfs_commit_transaction(trans, root); } mutex_lock(&dentry->d_inode->i_mutex); out: diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 3d46fa1f29a4..6b627c611808 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -73,6 +73,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; + path->leave_spinning = 1; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) { ret = -ENOENT; @@ -127,6 +129,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; + path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, root, path, &key, ins_len); if (ret == -EEXIST) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7d4f948bc22a..06d8db5afb08 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -134,6 +134,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; + path->leave_spinning = 1; btrfs_set_trans_block_group(trans, inode); key.objectid = inode->i_ino; @@ -167,9 +168,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, cur_size = min_t(unsigned long, compressed_size, PAGE_CACHE_SIZE); - kaddr = kmap(cpage); + kaddr = kmap_atomic(cpage, KM_USER0); write_extent_buffer(leaf, kaddr, ptr, cur_size); - kunmap(cpage); + kunmap_atomic(kaddr, KM_USER0); i++; ptr += cur_size; @@ -204,7 +205,7 @@ fail: * does the checks required to make sure the data is small enough * to fit as an inline extent. */ -static int cow_file_range_inline(struct btrfs_trans_handle *trans, +static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, size_t compressed_size, @@ -854,11 +855,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, u64 cur_end; int limit = 10 * 1024 * 1042; - if (!btrfs_test_opt(root, COMPRESS)) { - return cow_file_range(inode, locked_page, start, end, - page_started, nr_written, 1); - } - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | EXTENT_DELALLOC, 1, 0, GFP_NOFS); while (start < end) { @@ -935,7 +931,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root, * If no cow copies or snapshots exist, we write directly to the existing * blocks on disk */ -static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, +static noinline int run_delalloc_nocow(struct inode *inode, + struct page *locked_page, u64 start, u64 end, int *page_started, int force, unsigned long *nr_written) { @@ -1133,6 +1130,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, unsigned long *nr_written) { int ret; + struct btrfs_root *root = BTRFS_I(inode)->root; if (btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, locked_page, start, end, @@ -1140,10 +1138,12 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, else if (btrfs_test_flag(inode, PREALLOC)) ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); + else if (!btrfs_test_opt(root, COMPRESS)) + ret = cow_file_range(inode, locked_page, start, end, + page_started, nr_written, 1); else ret = cow_file_range_async(inode, locked_page, start, end, page_started, nr_written); - return ret; } @@ -1453,6 +1453,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); + path->leave_spinning = 1; ret = btrfs_drop_extents(trans, root, inode, file_pos, file_pos + num_bytes, file_pos, &hint); BUG_ON(ret); @@ -1475,6 +1476,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_compression(leaf, fi, compression); btrfs_set_file_extent_encryption(leaf, fi, encryption); btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); + + btrfs_unlock_up_safe(path, 1); + btrfs_set_lock_blocking(leaf); + btrfs_mark_buffer_dirty(leaf); inode_add_bytes(inode, num_bytes); @@ -1487,11 +1492,35 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, root->root_key.objectid, trans->transid, inode->i_ino, &ins); BUG_ON(ret); - btrfs_free_path(path); + return 0; } +/* + * helper function for btrfs_finish_ordered_io, this + * just reads in some of the csum leaves to prime them into ram + * before we start the transaction. It limits the amount of btree + * reads required while inside the transaction. + */ +static noinline void reada_csum(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_ordered_extent *ordered_extent) +{ + struct btrfs_ordered_sum *sum; + u64 bytenr; + + sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, + list); + bytenr = sum->sums[0].bytenr; + + /* + * we don't care about the results, the point of this search is + * just to get the btree leaves into ram + */ + btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); +} + /* as ordered data IO finishes, this gets called so we can finish * an ordered extent if the range of bytes in the file it covers are * fully written. @@ -1500,8 +1529,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - struct btrfs_ordered_extent *ordered_extent; + struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_path *path; int compressed = 0; int ret; @@ -1509,9 +1539,33 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) if (!ret) return 0; + /* + * before we join the transaction, try to do some of our IO. + * This will limit the amount of IO that we have to do with + * the transaction running. We're unlikely to need to do any + * IO if the file extents are new, the disk_i_size checks + * covers the most common case. + */ + if (start < BTRFS_I(inode)->disk_i_size) { + path = btrfs_alloc_path(); + if (path) { + ret = btrfs_lookup_file_extent(NULL, root, path, + inode->i_ino, + start, 0); + ordered_extent = btrfs_lookup_ordered_extent(inode, + start); + if (!list_empty(&ordered_extent->list)) { + btrfs_release_path(root, path); + reada_csum(root, path, ordered_extent); + } + btrfs_free_path(path); + } + } + trans = btrfs_join_transaction(root, 1); - ordered_extent = btrfs_lookup_ordered_extent(inode, start); + if (!ordered_extent) + ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) goto nocow; @@ -2101,6 +2155,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); + path->leave_spinning = 1; ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, 1); if (ret) { @@ -2147,6 +2202,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto err; } + path->leave_spinning = 1; di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (IS_ERR(di)) { @@ -2190,8 +2246,6 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, dir->i_ino); BUG_ON(ret != 0 && ret != -ENOENT); - if (ret != -ENOENT) - BTRFS_I(dir)->log_dirty_trans = trans->transid; ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, index); @@ -2224,6 +2278,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + + btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); + ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); @@ -2498,6 +2555,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, key.type = (u8)-1; search_again: + path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) goto error; @@ -2644,6 +2702,7 @@ delete: break; } if (found_extent) { + btrfs_set_path_blocking(path); ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, leaf->start, root_owner, @@ -2848,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - if (S_ISREG(inode->i_mode) && - attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { - err = btrfs_cont_expand(inode, attr->ia_size); - if (err) - return err; + if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { + if (attr->ia_size > inode->i_size) { + err = btrfs_cont_expand(inode, attr->ia_size); + if (err) + return err; + } else if (inode->i_size > 0 && + attr->ia_size == 0) { + + /* we're truncating a file that used to have good + * data down to zero. Make sure it gets into + * the ordered flush list so that any new writes + * get down to disk quickly. + */ + BTRFS_I(inode)->ordered_data_close = 1; + } } err = inode_setattr(inode, attr); @@ -2984,13 +3053,14 @@ static noinline void init_btrfs_i(struct inode *inode) bi->disk_i_size = 0; bi->flags = 0; bi->index_cnt = (u64)-1; - bi->log_dirty_trans = 0; + bi->last_unlink_trans = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, inode->i_mapping, GFP_NOFS); INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); + INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); mutex_init(&BTRFS_I(inode)->extent_mutex); mutex_init(&BTRFS_I(inode)->log_mutex); @@ -3449,6 +3519,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, sizes[0] = sizeof(struct btrfs_inode_item); sizes[1] = name_len + sizeof(*ref); + path->leave_spinning = 1; ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); if (ret != 0) goto fail; @@ -3727,6 +3798,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; nr = trans->blocks_used; + + btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { @@ -4292,8 +4365,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. */ -int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = fdentry(vma->vm_file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -4306,10 +4380,15 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) u64 page_end; ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); - if (ret) + if (ret) { + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else /* -ENOSPC, -EIO, etc */ + ret = VM_FAULT_SIGBUS; goto out; + } - ret = -EINVAL; + ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ again: lock_page(page); size = i_size_read(inode); @@ -4357,6 +4436,8 @@ again: } ClearPageChecked(page); set_page_dirty(page); + + BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: @@ -4382,6 +4463,27 @@ static void btrfs_truncate(struct inode *inode) btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); trans = btrfs_start_transaction(root, 1); + + /* + * setattr is responsible for setting the ordered_data_close flag, + * but that is only tested during the last file release. That + * could happen well after the next commit, leaving a great big + * window where new writes may get lost if someone chooses to write + * to this file after truncating to zero + * + * The inode doesn't have any dirty data here, and so if we commit + * this is a noop. If someone immediately starts writing to the inode + * it is very likely we'll catch some of their writes in this + * transaction, and the commit will find this file on the ordered + * data list with good things to send down. + * + * This is a best effort solution, there is still a window where + * using truncate to replace the contents of the file will + * end up with a zero length file after a crash. + */ + if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) + btrfs_add_ordered_operation(trans, root, inode); + btrfs_set_trans_block_group(trans, inode); btrfs_i_size_write(inode, inode->i_size); @@ -4458,12 +4560,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->i_acl = BTRFS_ACL_NOT_CACHED; ei->i_default_acl = BTRFS_ACL_NOT_CACHED; INIT_LIST_HEAD(&ei->i_orphan); + INIT_LIST_HEAD(&ei->ordered_operations); return &ei->vfs_inode; } void btrfs_destroy_inode(struct inode *inode) { struct btrfs_ordered_extent *ordered; + struct btrfs_root *root = BTRFS_I(inode)->root; + WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); @@ -4474,13 +4579,24 @@ void btrfs_destroy_inode(struct inode *inode) BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) posix_acl_release(BTRFS_I(inode)->i_default_acl); - spin_lock(&BTRFS_I(inode)->root->list_lock); + /* + * Make sure we're properly removed from the ordered operation + * lists. + */ + smp_mb(); + if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { + spin_lock(&root->fs_info->ordered_extent_lock); + list_del_init(&BTRFS_I(inode)->ordered_operations); + spin_unlock(&root->fs_info->ordered_extent_lock); + } + + spin_lock(&root->list_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" " list\n", inode->i_ino); dump_stack(); } - spin_unlock(&BTRFS_I(inode)->root->list_lock); + spin_unlock(&root->list_lock); while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); @@ -4605,8 +4721,36 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (ret) goto out_unlock; + /* + * we're using rename to replace one file with another. + * and the replacement file is large. Start IO on it now so + * we don't add too much work to the end of the transaction + */ + if (new_inode && old_inode && S_ISREG(old_inode->i_mode) && + new_inode->i_size && + old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) + filemap_flush(old_inode->i_mapping); + trans = btrfs_start_transaction(root, 1); + /* + * make sure the inode gets flushed if it is replacing + * something. + */ + if (new_inode && new_inode->i_size && + old_inode && S_ISREG(old_inode->i_mode)) { + btrfs_add_ordered_operation(trans, root, old_inode); + } + + /* + * this is an ugly little race, but the rename is required to make + * sure that if we crash, the inode is either at the old name + * or the new one. pinning the log transaction lets us make sure + * we don't allow a log commit to come in after we unlink the + * name but before we add the new name back in. + */ + btrfs_pin_log_trans(root); + btrfs_set_trans_block_group(trans, new_dir); btrfs_inc_nlink(old_dentry->d_inode); @@ -4614,6 +4758,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; + if (old_dentry->d_parent != new_dentry->d_parent) + btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); + ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, old_dentry->d_name.name, old_dentry->d_name.len); @@ -4645,7 +4792,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (ret) goto out_fail; + btrfs_log_new_name(trans, old_inode, old_dir, + new_dentry->d_parent); out_fail: + + /* this btrfs_end_log_trans just allows the current + * log-sub transaction to complete + */ + btrfs_end_log_trans(root); btrfs_end_transaction_throttle(trans, root); out_unlock: return ret; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 47b0a88c12a2..a5310c0f41e2 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -71,12 +71,13 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb) static int btrfs_spin_on_block(struct extent_buffer *eb) { int i; + for (i = 0; i < 512; i++) { - cpu_relax(); if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) return 1; if (need_resched()) break; + cpu_relax(); } return 0; } @@ -95,13 +96,15 @@ int btrfs_try_spin_lock(struct extent_buffer *eb) { int i; - spin_nested(eb); - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - spin_unlock(&eb->lock); - + if (btrfs_spin_on_block(eb)) { + spin_nested(eb); + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) + return 1; + spin_unlock(&eb->lock); + } /* spin for a bit on the BLOCKING flag */ for (i = 0; i < 2; i++) { + cpu_relax(); if (!btrfs_spin_on_block(eb)) break; @@ -148,6 +151,9 @@ int btrfs_tree_lock(struct extent_buffer *eb) DEFINE_WAIT(wait); wait.func = btrfs_wake_function; + if (!btrfs_spin_on_block(eb)) + goto sleep; + while(1) { spin_nested(eb); @@ -165,9 +171,10 @@ int btrfs_tree_lock(struct extent_buffer *eb) * spin for a bit, and if the blocking flag goes away, * loop around */ + cpu_relax(); if (btrfs_spin_on_block(eb)) continue; - +sleep: prepare_to_wait_exclusive(&eb->lock_wq, &wait, TASK_UNINTERRUPTIBLE); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 77c2411a5f0f..53c87b197d70 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -310,6 +310,16 @@ int btrfs_remove_ordered_extent(struct inode *inode, spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); + + /* + * we have no more ordered extents for this inode and + * no dirty pages. We can safely remove it from the + * list of ordered extents + */ + if (RB_EMPTY_ROOT(&tree->tree) && + !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { + list_del_init(&BTRFS_I(inode)->ordered_operations); + } spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); mutex_unlock(&tree->mutex); @@ -370,6 +380,68 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) } /* + * this is used during transaction commit to write all the inodes + * added to the ordered operation list. These files must be fully on + * disk before the transaction commits. + * + * we have two modes here, one is to just start the IO via filemap_flush + * and the other is to wait for all the io. When we wait, we have an + * extra check to make sure the ordered operation list really is empty + * before we return + */ +int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) +{ + struct btrfs_inode *btrfs_inode; + struct inode *inode; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + mutex_lock(&root->fs_info->ordered_operations_mutex); + spin_lock(&root->fs_info->ordered_extent_lock); +again: + list_splice_init(&root->fs_info->ordered_operations, &splice); + + while (!list_empty(&splice)) { + btrfs_inode = list_entry(splice.next, struct btrfs_inode, + ordered_operations); + + inode = &btrfs_inode->vfs_inode; + + list_del_init(&btrfs_inode->ordered_operations); + + /* + * the inode may be getting freed (in sys_unlink path). + */ + inode = igrab(inode); + + if (!wait && inode) { + list_add_tail(&BTRFS_I(inode)->ordered_operations, + &root->fs_info->ordered_operations); + } + spin_unlock(&root->fs_info->ordered_extent_lock); + + if (inode) { + if (wait) + btrfs_wait_ordered_range(inode, 0, (u64)-1); + else + filemap_flush(inode->i_mapping); + iput(inode); + } + + cond_resched(); + spin_lock(&root->fs_info->ordered_extent_lock); + } + if (wait && !list_empty(&root->fs_info->ordered_operations)) + goto again; + + spin_unlock(&root->fs_info->ordered_extent_lock); + mutex_unlock(&root->fs_info->ordered_operations_mutex); + + return 0; +} + +/* * Used to start IO or wait for a given ordered extent to finish. * * If wait is one, this effectively waits on page writeback for all the pages @@ -726,3 +798,49 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, return ret; } + +/* + * add a given inode to the list of inodes that must be fully on + * disk before a transaction commit finishes. + * + * This basically gives us the ext3 style data=ordered mode, and it is mostly + * used to make sure renamed files are fully on disk. + * + * It is a noop if the inode is already fully on disk. + * + * If trans is not null, we'll do a friendly check for a transaction that + * is already flushing things and force the IO down ourselves. + */ +int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + u64 last_mod; + + last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); + + /* + * if this file hasn't been changed since the last transaction + * commit, we can safely return without doing anything + */ + if (last_mod < root->fs_info->last_trans_committed) + return 0; + + /* + * the transaction is already committing. Just start the IO and + * don't bother with all of this list nonsense + */ + if (trans && root->fs_info->running_transaction->blocked) { + btrfs_wait_ordered_range(inode, 0, (u64)-1); + return 0; + } + + spin_lock(&root->fs_info->ordered_extent_lock); + if (list_empty(&BTRFS_I(inode)->ordered_operations)) { + list_add_tail(&BTRFS_I(inode)->ordered_operations, + &root->fs_info->ordered_operations); + } + spin_unlock(&root->fs_info->ordered_extent_lock); + + return 0; +} diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index ab66d5e8d6d6..3d31c8827b01 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -155,4 +155,8 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); +int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); +int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4112d53d4f4d..664782c6a2df 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -65,6 +65,15 @@ static noinline int join_transaction(struct btrfs_root *root) cur_trans->use_count = 1; cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); + + cur_trans->delayed_refs.root.rb_node = NULL; + cur_trans->delayed_refs.num_entries = 0; + cur_trans->delayed_refs.num_heads_ready = 0; + cur_trans->delayed_refs.num_heads = 0; + cur_trans->delayed_refs.flushing = 0; + cur_trans->delayed_refs.run_delayed_start = 0; + spin_lock_init(&cur_trans->delayed_refs.lock); + INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, @@ -182,6 +191,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, h->block_group = 0; h->alloc_exclude_nr = 0; h->alloc_exclude_start = 0; + h->delayed_ref_updates = 0; + root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); return h; @@ -271,7 +282,6 @@ void btrfs_throttle(struct btrfs_root *root) if (!root->fs_info->open_ioctl_trans) wait_current_trans(root); mutex_unlock(&root->fs_info->trans_mutex); - throttle_on_drops(root); } @@ -280,6 +290,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans; struct btrfs_fs_info *info = root->fs_info; + int count = 0; + + while (count < 4) { + unsigned long cur = trans->delayed_ref_updates; + trans->delayed_ref_updates = 0; + if (cur && + trans->transaction->delayed_refs.num_heads_ready > 64) { + trans->delayed_ref_updates = 0; + + /* + * do a full flush if the transaction is trying + * to close + */ + if (trans->transaction->delayed_refs.flushing) + cur = 0; + btrfs_run_delayed_refs(trans, root, cur); + } else { + break; + } + count++; + } mutex_lock(&info->trans_mutex); cur_trans = info->running_transaction; @@ -424,9 +455,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, u64 old_root_bytenr; struct btrfs_root *tree_root = root->fs_info->tree_root; - btrfs_extent_post_op(trans, root); btrfs_write_dirty_block_groups(trans, root); - btrfs_extent_post_op(trans, root); + + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + BUG_ON(ret); while (1) { old_root_bytenr = btrfs_root_bytenr(&root->root_item); @@ -438,14 +470,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, btrfs_header_level(root->node)); btrfs_set_root_generation(&root->root_item, trans->transid); - btrfs_extent_post_op(trans, root); - ret = btrfs_update_root(trans, tree_root, &root->root_key, &root->root_item); BUG_ON(ret); btrfs_write_dirty_block_groups(trans, root); - btrfs_extent_post_op(trans, root); + + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + BUG_ON(ret); } return 0; } @@ -459,15 +491,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct list_head *next; struct extent_buffer *eb; + int ret; - btrfs_extent_post_op(trans, fs_info->tree_root); + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + BUG_ON(ret); eb = btrfs_lock_root_node(fs_info->tree_root); - btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0); + btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); btrfs_tree_unlock(eb); free_extent_buffer(eb); - btrfs_extent_post_op(trans, fs_info->tree_root); + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + BUG_ON(ret); while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; @@ -475,6 +510,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, root = list_entry(next, struct btrfs_root, dirty_list); update_cowonly_root(trans, root); + + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + BUG_ON(ret); } return 0; } @@ -635,6 +673,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) } /* + * when dropping snapshots, we generate a ton of delayed refs, and it makes + * sense not to join the transaction while it is trying to flush the current + * queue of delayed refs out. + * + * This is used by the drop snapshot code only + */ +static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) +{ + DEFINE_WAIT(wait); + + mutex_lock(&info->trans_mutex); + while (info->running_transaction && + info->running_transaction->delayed_refs.flushing) { + prepare_to_wait(&info->transaction_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&info->trans_mutex); + schedule(); + mutex_lock(&info->trans_mutex); + finish_wait(&info->transaction_wait, &wait); + } + mutex_unlock(&info->trans_mutex); + return 0; +} + +/* * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on * all of them */ @@ -661,7 +724,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, atomic_inc(&root->fs_info->throttles); while (1) { + /* + * we don't want to jump in and create a bunch of + * delayed refs if the transaction is starting to close + */ + wait_transaction_pre_flush(tree_root->fs_info); trans = btrfs_start_transaction(tree_root, 1); + + /* + * we've joined a transaction, make sure it isn't + * closing right now + */ + if (trans->transaction->delayed_refs.flushing) { + btrfs_end_transaction(trans, tree_root); + continue; + } + mutex_lock(&root->fs_info->drop_mutex); ret = btrfs_drop_snapshot(trans, dirty->root); if (ret != -EAGAIN) @@ -766,7 +844,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); old = btrfs_lock_root_node(root); - btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); + btrfs_cow_block(trans, root, old, NULL, 0, &old); btrfs_copy_root(trans, root, old, &tmp, objectid); btrfs_tree_unlock(old); @@ -894,12 +972,31 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); int ret; + int should_grow = 0; + unsigned long now = get_seconds(); + + btrfs_run_ordered_operations(root, 0); + + /* make a pass through all the delayed refs we have so far + * any runnings procs may add more while we are here + */ + ret = btrfs_run_delayed_refs(trans, root, 0); + BUG_ON(ret); + + cur_trans = trans->transaction; + /* + * set the flushing flag so procs in this transaction have to + * start sending their work down. + */ + cur_trans->delayed_refs.flushing = 1; + + ret = btrfs_run_delayed_refs(trans, root, 0); + BUG_ON(ret); - INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); - if (trans->transaction->in_commit) { - cur_trans = trans->transaction; - trans->transaction->use_count++; + INIT_LIST_HEAD(&dirty_fs_roots); + if (cur_trans->in_commit) { + cur_trans->use_count++; mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); @@ -922,7 +1019,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, trans->transaction->in_commit = 1; trans->transaction->blocked = 1; - cur_trans = trans->transaction; if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, struct btrfs_transaction, list); @@ -937,6 +1033,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } } + if (now < cur_trans->start_time || now - cur_trans->start_time < 1) + should_grow = 1; + do { int snap_pending = 0; joined = cur_trans->num_joined; @@ -949,7 +1048,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (cur_trans->num_writers > 1) timeout = MAX_SCHEDULE_TIMEOUT; - else + else if (should_grow) timeout = 1; mutex_unlock(&root->fs_info->trans_mutex); @@ -959,16 +1058,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, BUG_ON(ret); } - schedule_timeout(timeout); + /* + * rename don't use btrfs_join_transaction, so, once we + * set the transaction to blocked above, we aren't going + * to get any new ordered operations. We can safely run + * it here and no for sure that nothing new will be added + * to the list + */ + btrfs_run_ordered_operations(root, 1); + + smp_mb(); + if (cur_trans->num_writers > 1 || should_grow) + schedule_timeout(timeout); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); } while (cur_trans->num_writers > 1 || - (cur_trans->num_joined != joined)); + (should_grow && cur_trans->num_joined != joined)); ret = create_pending_snapshots(trans, root->fs_info); BUG_ON(ret); + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + BUG_ON(ret); + WARN_ON(cur_trans != trans->transaction); /* btrfs_commit_tree_roots is responsible for getting the @@ -1032,6 +1145,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_copy_pinned(root, pinned_copy); trans->transaction->blocked = 0; + wake_up(&root->fs_info->transaction_throttle); wake_up(&root->fs_info->transaction_wait); @@ -1058,6 +1172,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; + root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ea292117f882..94f5bde2b58d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -19,10 +19,16 @@ #ifndef __BTRFS_TRANSACTION__ #define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" +#include "delayed-ref.h" struct btrfs_transaction { u64 transid; + /* + * total writers in this transaction, it must be zero before the + * transaction can end + */ unsigned long num_writers; + unsigned long num_joined; int in_commit; int use_count; @@ -34,6 +40,7 @@ struct btrfs_transaction { wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; + struct btrfs_delayed_ref_root delayed_refs; }; struct btrfs_trans_handle { @@ -44,6 +51,7 @@ struct btrfs_trans_handle { u64 block_group; u64 alloc_exclude_start; u64 alloc_exclude_nr; + unsigned long delayed_ref_updates; }; struct btrfs_pending_snapshot { diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 98d25fa4570e..b10eacdb1620 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -124,8 +124,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } btrfs_release_path(root, path); - if (is_extent) - btrfs_extent_post_op(trans, root); out: if (path) btrfs_free_path(path); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9c462fbd60fa..fc9b87a7975b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -35,6 +35,49 @@ #define LOG_INODE_EXISTS 1 /* + * directory trouble cases + * + * 1) on rename or unlink, if the inode being unlinked isn't in the fsync + * log, we must force a full commit before doing an fsync of the directory + * where the unlink was done. + * ---> record transid of last unlink/rename per directory + * + * mkdir foo/some_dir + * normal commit + * rename foo/some_dir foo2/some_dir + * mkdir foo/some_dir + * fsync foo/some_dir/some_file + * + * The fsync above will unlink the original some_dir without recording + * it in its new location (foo2). After a crash, some_dir will be gone + * unless the fsync of some_file forces a full commit + * + * 2) we must log any new names for any file or dir that is in the fsync + * log. ---> check inode while renaming/linking. + * + * 2a) we must log any new names for any file or dir during rename + * when the directory they are being removed from was logged. + * ---> check inode and old parent dir during rename + * + * 2a is actually the more important variant. With the extra logging + * a crash might unlink the old name without recreating the new one + * + * 3) after a crash, we must go through any directories with a link count + * of zero and redo the rm -rf + * + * mkdir f1/foo + * normal commit + * rm -rf f1/foo + * fsync(f1) + * + * The directory f1 was fully removed from the FS, but fsync was never + * called on f1, only its parent dir. After a crash the rm -rf must + * be replayed. This must be able to recurse down the entire + * directory tree. The inode link count fixup code takes care of the + * ugly details. + */ + +/* * stages for the tree walking. The first * stage (0) is to only pin down the blocks we find * the second stage (1) is to make sure that all the inodes @@ -47,12 +90,17 @@ #define LOG_WALK_REPLAY_INODES 1 #define LOG_WALK_REPLAY_ALL 2 -static int __btrfs_log_inode(struct btrfs_trans_handle *trans, +static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, int inode_only); static int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid); +static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_root *log, + struct btrfs_path *path, + u64 dirid, int del_all); /* * tree logging is a special write ahead log used to make sure that @@ -133,10 +181,25 @@ static int join_running_log_trans(struct btrfs_root *root) } /* + * This either makes the current running log transaction wait + * until you call btrfs_end_log_trans() or it makes any future + * log transactions wait until you call btrfs_end_log_trans() + */ +int btrfs_pin_log_trans(struct btrfs_root *root) +{ + int ret = -ENOENT; + + mutex_lock(&root->log_mutex); + atomic_inc(&root->log_writers); + mutex_unlock(&root->log_mutex); + return ret; +} + +/* * indicate we're done making changes to the log tree * and wake up anyone waiting to do a sync */ -static int end_log_trans(struct btrfs_root *root) +int btrfs_end_log_trans(struct btrfs_root *root) { if (atomic_dec_and_test(&root->log_writers)) { smp_mb(); @@ -203,7 +266,6 @@ static int process_one_buffer(struct btrfs_root *log, mutex_lock(&log->fs_info->pinned_mutex); btrfs_update_pinned_extents(log->fs_info->extent_root, eb->start, eb->len, 1); - mutex_unlock(&log->fs_info->pinned_mutex); } if (btrfs_buffer_uptodate(eb, gen)) { @@ -603,6 +665,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, ret = link_to_fixup_dir(trans, root, path, location.objectid); BUG_ON(ret); + ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); BUG_ON(ret); kfree(name); @@ -804,6 +867,7 @@ conflict_again: victim_name_len)) { btrfs_inc_nlink(inode); btrfs_release_path(root, path); + ret = btrfs_unlink_inode(trans, root, dir, inode, victim_name, victim_name_len); @@ -922,13 +986,20 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, key.offset--; btrfs_release_path(root, path); } - btrfs_free_path(path); + btrfs_release_path(root, path); if (nlink != inode->i_nlink) { inode->i_nlink = nlink; btrfs_update_inode(trans, root, inode); } BTRFS_I(inode)->index_cnt = (u64)-1; + if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { + ret = replay_dir_deletes(trans, root, NULL, path, + inode->i_ino, 1); + BUG_ON(ret); + } + btrfs_free_path(path); + return 0; } @@ -971,9 +1042,12 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, iput(inode); - if (key.offset == 0) - break; - key.offset--; + /* + * fixup on a directory may create new entries, + * make sure we always look for the highset possible + * offset + */ + key.offset = (u64)-1; } btrfs_release_path(root, path); return 0; @@ -1313,11 +1387,11 @@ again: read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len); log_di = NULL; - if (dir_key->type == BTRFS_DIR_ITEM_KEY) { + if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { log_di = btrfs_lookup_dir_item(trans, log, log_path, dir_key->objectid, name, name_len, 0); - } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) { + } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { log_di = btrfs_lookup_dir_index_item(trans, log, log_path, dir_key->objectid, @@ -1378,7 +1452,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_root *log, struct btrfs_path *path, - u64 dirid) + u64 dirid, int del_all) { u64 range_start; u64 range_end; @@ -1408,10 +1482,14 @@ again: range_start = 0; range_end = 0; while (1) { - ret = find_dir_range(log, path, dirid, key_type, - &range_start, &range_end); - if (ret != 0) - break; + if (del_all) + range_end = (u64)-1; + else { + ret = find_dir_range(log, path, dirid, key_type, + &range_start, &range_end); + if (ret != 0) + break; + } dir_key.offset = range_start; while (1) { @@ -1437,7 +1515,8 @@ again: break; ret = check_item_in_log(trans, root, log, path, - log_path, dir, &found_key); + log_path, dir, + &found_key); BUG_ON(ret); if (found_key.offset == (u64)-1) break; @@ -1514,7 +1593,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, mode = btrfs_inode_mode(eb, inode_item); if (S_ISDIR(mode)) { ret = replay_dir_deletes(wc->trans, - root, log, path, key.objectid); + root, log, path, key.objectid, 0); BUG_ON(ret); } ret = overwrite_item(wc->trans, root, path, @@ -1533,6 +1612,17 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); BUG_ON(ret); + + /* if the nlink count is zero here, the iput + * will free the inode. We bump it to make + * sure it doesn't get freed until the link + * count fixup is done + */ + if (inode->i_nlink == 0) { + btrfs_inc_nlink(inode); + btrfs_update_inode(wc->trans, + root, inode); + } iput(inode); } ret = link_to_fixup_dir(wc->trans, root, @@ -1840,7 +1930,8 @@ static int update_log_root(struct btrfs_trans_handle *trans, return ret; } -static int wait_log_commit(struct btrfs_root *root, unsigned long transid) +static int wait_log_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, unsigned long transid) { DEFINE_WAIT(wait); int index = transid % 2; @@ -1854,9 +1945,12 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid) prepare_to_wait(&root->log_commit_wait[index], &wait, TASK_UNINTERRUPTIBLE); mutex_unlock(&root->log_mutex); - if (root->log_transid < transid + 2 && + + if (root->fs_info->last_trans_log_full_commit != + trans->transid && root->log_transid < transid + 2 && atomic_read(&root->log_commit[index])) schedule(); + finish_wait(&root->log_commit_wait[index], &wait); mutex_lock(&root->log_mutex); } while (root->log_transid < transid + 2 && @@ -1864,14 +1958,16 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid) return 0; } -static int wait_for_writer(struct btrfs_root *root) +static int wait_for_writer(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { DEFINE_WAIT(wait); while (atomic_read(&root->log_writers)) { prepare_to_wait(&root->log_writer_wait, &wait, TASK_UNINTERRUPTIBLE); mutex_unlock(&root->log_mutex); - if (atomic_read(&root->log_writers)) + if (root->fs_info->last_trans_log_full_commit != + trans->transid && atomic_read(&root->log_writers)) schedule(); mutex_lock(&root->log_mutex); finish_wait(&root->log_writer_wait, &wait); @@ -1882,7 +1978,14 @@ static int wait_for_writer(struct btrfs_root *root) /* * btrfs_sync_log does sends a given tree log down to the disk and * updates the super blocks to record it. When this call is done, - * you know that any inodes previously logged are safely on disk + * you know that any inodes previously logged are safely on disk only + * if it returns 0. + * + * Any other return value means you need to call btrfs_commit_transaction. + * Some of the edge cases for fsyncing directories that have had unlinks + * or renames done in the past mean that sometimes the only safe + * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN, + * that has happened. */ int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) @@ -1896,7 +1999,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_lock(&root->log_mutex); index1 = root->log_transid % 2; if (atomic_read(&root->log_commit[index1])) { - wait_log_commit(root, root->log_transid); + wait_log_commit(trans, root, root->log_transid); mutex_unlock(&root->log_mutex); return 0; } @@ -1904,18 +2007,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* wait for previous tree log sync to complete */ if (atomic_read(&root->log_commit[(index1 + 1) % 2])) - wait_log_commit(root, root->log_transid - 1); + wait_log_commit(trans, root, root->log_transid - 1); while (1) { unsigned long batch = root->log_batch; mutex_unlock(&root->log_mutex); schedule_timeout_uninterruptible(1); mutex_lock(&root->log_mutex); - wait_for_writer(root); + + wait_for_writer(trans, root); if (batch == root->log_batch) break; } + /* bail out if we need to do a full commit */ + if (root->fs_info->last_trans_log_full_commit == trans->transid) { + ret = -EAGAIN; + mutex_unlock(&root->log_mutex); + goto out; + } + ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); BUG_ON(ret); @@ -1951,16 +2062,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, index2 = log_root_tree->log_transid % 2; if (atomic_read(&log_root_tree->log_commit[index2])) { - wait_log_commit(log_root_tree, log_root_tree->log_transid); + wait_log_commit(trans, log_root_tree, + log_root_tree->log_transid); mutex_unlock(&log_root_tree->log_mutex); goto out; } atomic_set(&log_root_tree->log_commit[index2], 1); - if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) - wait_log_commit(log_root_tree, log_root_tree->log_transid - 1); + if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { + wait_log_commit(trans, log_root_tree, + log_root_tree->log_transid - 1); + } + + wait_for_writer(trans, log_root_tree); - wait_for_writer(log_root_tree); + /* + * now that we've moved on to the tree of log tree roots, + * check the full commit flag again + */ + if (root->fs_info->last_trans_log_full_commit == trans->transid) { + mutex_unlock(&log_root_tree->log_mutex); + ret = -EAGAIN; + goto out_wake_log_root; + } ret = btrfs_write_and_wait_marked_extents(log_root_tree, &log_root_tree->dirty_log_pages); @@ -1985,7 +2109,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * in and cause problems either. */ write_ctree_super(trans, root->fs_info->tree_root, 2); + ret = 0; +out_wake_log_root: atomic_set(&log_root_tree->log_commit[index2], 0); smp_mb(); if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) @@ -1998,7 +2124,8 @@ out: return 0; } -/* * free all the extents used by the tree log. This should be called +/* + * free all the extents used by the tree log. This should be called * at commit time of the full transaction */ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) @@ -2132,7 +2259,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, btrfs_free_path(path); mutex_unlock(&BTRFS_I(dir)->log_mutex); - end_log_trans(root); + btrfs_end_log_trans(root); return 0; } @@ -2159,7 +2286,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, dirid, &index); mutex_unlock(&BTRFS_I(inode)->log_mutex); - end_log_trans(root); + btrfs_end_log_trans(root); return ret; } @@ -2559,7 +2686,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * * This handles both files and directories. */ -static int __btrfs_log_inode(struct btrfs_trans_handle *trans, +static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, int inode_only) { @@ -2585,28 +2712,17 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, min_key.offset = 0; max_key.objectid = inode->i_ino; + + /* today the code can only do partial logging of directories */ + if (!S_ISDIR(inode->i_mode)) + inode_only = LOG_INODE_ALL; + if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) max_key.type = BTRFS_XATTR_ITEM_KEY; else max_key.type = (u8)-1; max_key.offset = (u64)-1; - /* - * if this inode has already been logged and we're in inode_only - * mode, we don't want to delete the things that have already - * been written to the log. - * - * But, if the inode has been through an inode_only log, - * the logged_trans field is not set. This allows us to catch - * any new names for this inode in the backrefs by logging it - * again - */ - if (inode_only == LOG_INODE_EXISTS && - BTRFS_I(inode)->logged_trans == trans->transid) { - btrfs_free_path(path); - btrfs_free_path(dst_path); - goto out; - } mutex_lock(&BTRFS_I(inode)->log_mutex); /* @@ -2693,7 +2809,6 @@ next_slot: if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { btrfs_release_path(root, path); btrfs_release_path(log, dst_path); - BTRFS_I(inode)->log_dirty_trans = 0; ret = log_directory_changes(trans, root, inode, path, dst_path); BUG_ON(ret); } @@ -2702,19 +2817,69 @@ next_slot: btrfs_free_path(path); btrfs_free_path(dst_path); -out: return 0; } -int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - int inode_only) +/* + * follow the dentry parent pointers up the chain and see if any + * of the directories in it require a full commit before they can + * be logged. Returns zero if nothing special needs to be done or 1 if + * a full commit is required. + */ +static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, + struct inode *inode, + struct dentry *parent, + struct super_block *sb, + u64 last_committed) { - int ret; + int ret = 0; + struct btrfs_root *root; - start_log_trans(trans, root); - ret = __btrfs_log_inode(trans, root, inode, inode_only); - end_log_trans(root); + /* + * for regular files, if its inode is already on disk, we don't + * have to worry about the parents at all. This is because + * we can use the last_unlink_trans field to record renames + * and other fun in this file. + */ + if (S_ISREG(inode->i_mode) && + BTRFS_I(inode)->generation <= last_committed && + BTRFS_I(inode)->last_unlink_trans <= last_committed) + goto out; + + if (!S_ISDIR(inode->i_mode)) { + if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) + goto out; + inode = parent->d_inode; + } + + while (1) { + BTRFS_I(inode)->logged_trans = trans->transid; + smp_mb(); + + if (BTRFS_I(inode)->last_unlink_trans > last_committed) { + root = BTRFS_I(inode)->root; + + /* + * make sure any commits to the log are forced + * to be full commits + */ + root->fs_info->last_trans_log_full_commit = + trans->transid; + ret = 1; + break; + } + + if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) + break; + + if (parent == sb->s_root) + break; + + parent = parent->d_parent; + inode = parent->d_inode; + + } +out: return ret; } @@ -2724,31 +2889,65 @@ int btrfs_log_inode(struct btrfs_trans_handle *trans, * only logging is done of any parent directories that are older than * the last committed transaction */ -int btrfs_log_dentry(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct dentry *dentry) +int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct dentry *parent, int exists_only) { - int inode_only = LOG_INODE_ALL; + int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; struct super_block *sb; - int ret; + int ret = 0; + u64 last_committed = root->fs_info->last_trans_committed; + + sb = inode->i_sb; + + if (root->fs_info->last_trans_log_full_commit > + root->fs_info->last_trans_committed) { + ret = 1; + goto end_no_trans; + } + + ret = check_parent_dirs_for_sync(trans, inode, parent, + sb, last_committed); + if (ret) + goto end_no_trans; start_log_trans(trans, root); - sb = dentry->d_inode->i_sb; - while (1) { - ret = __btrfs_log_inode(trans, root, dentry->d_inode, - inode_only); - BUG_ON(ret); - inode_only = LOG_INODE_EXISTS; - dentry = dentry->d_parent; - if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb) + ret = btrfs_log_inode(trans, root, inode, inode_only); + BUG_ON(ret); + + /* + * for regular files, if its inode is already on disk, we don't + * have to worry about the parents at all. This is because + * we can use the last_unlink_trans field to record renames + * and other fun in this file. + */ + if (S_ISREG(inode->i_mode) && + BTRFS_I(inode)->generation <= last_committed && + BTRFS_I(inode)->last_unlink_trans <= last_committed) + goto no_parent; + + inode_only = LOG_INODE_EXISTS; + while (1) { + if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) break; - if (BTRFS_I(dentry->d_inode)->generation <= - root->fs_info->last_trans_committed) + inode = parent->d_inode; + if (BTRFS_I(inode)->generation > + root->fs_info->last_trans_committed) { + ret = btrfs_log_inode(trans, root, inode, inode_only); + BUG_ON(ret); + } + if (parent == sb->s_root) break; + + parent = parent->d_parent; } - end_log_trans(root); - return 0; +no_parent: + ret = 0; + btrfs_end_log_trans(root); +end_no_trans: + return ret; } /* @@ -2760,12 +2959,8 @@ int btrfs_log_dentry(struct btrfs_trans_handle *trans, int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry) { - u64 gen; - gen = root->fs_info->last_trans_new_blockgroup; - if (gen > root->fs_info->last_trans_committed) - return 1; - else - return btrfs_log_dentry(trans, root, dentry); + return btrfs_log_inode_parent(trans, root, dentry->d_inode, + dentry->d_parent, 0); } /* @@ -2884,3 +3079,94 @@ again: kfree(log_root_tree); return 0; } + +/* + * there are some corner cases where we want to force a full + * commit instead of allowing a directory to be logged. + * + * They revolve around files there were unlinked from the directory, and + * this function updates the parent directory so that a full commit is + * properly done if it is fsync'd later after the unlinks are done. + */ +void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, + struct inode *dir, struct inode *inode, + int for_rename) +{ + /* + * when we're logging a file, if it hasn't been renamed + * or unlinked, and its inode is fully committed on disk, + * we don't have to worry about walking up the directory chain + * to log its parents. + * + * So, we use the last_unlink_trans field to put this transid + * into the file. When the file is logged we check it and + * don't log the parents if the file is fully on disk. + */ + if (S_ISREG(inode->i_mode)) + BTRFS_I(inode)->last_unlink_trans = trans->transid; + + /* + * if this directory was already logged any new + * names for this file/dir will get recorded + */ + smp_mb(); + if (BTRFS_I(dir)->logged_trans == trans->transid) + return; + + /* + * if the inode we're about to unlink was logged, + * the log will be properly updated for any new names + */ + if (BTRFS_I(inode)->logged_trans == trans->transid) + return; + + /* + * when renaming files across directories, if the directory + * there we're unlinking from gets fsync'd later on, there's + * no way to find the destination directory later and fsync it + * properly. So, we have to be conservative and force commits + * so the new name gets discovered. + */ + if (for_rename) + goto record; + + /* we can safely do the unlink without any special recording */ + return; + +record: + BTRFS_I(dir)->last_unlink_trans = trans->transid; +} + +/* + * Call this after adding a new name for a file and it will properly + * update the log to reflect the new name. + * + * It will return zero if all goes well, and it will return 1 if a + * full transaction commit is required. + */ +int btrfs_log_new_name(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *old_dir, + struct dentry *parent) +{ + struct btrfs_root * root = BTRFS_I(inode)->root; + + /* + * this will force the logging code to walk the dentry chain + * up for the file + */ + if (S_ISREG(inode->i_mode)) + BTRFS_I(inode)->last_unlink_trans = trans->transid; + + /* + * if this inode hasn't been logged and directory we're renaming it + * from hasn't been logged, we don't need to log it + */ + if (BTRFS_I(inode)->logged_trans <= + root->fs_info->last_trans_committed && + (!old_dir || BTRFS_I(old_dir)->logged_trans <= + root->fs_info->last_trans_committed)) + return 0; + + return btrfs_log_inode_parent(trans, root, inode, parent, 1); +} + diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index b9409b32ed02..d09c7609e16b 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -22,14 +22,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_log_dentry(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct dentry *dentry); int btrfs_recover_log_trees(struct btrfs_root *tree_root); int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry); -int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - int inode_only); int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, @@ -38,4 +33,16 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, struct inode *inode, u64 dirid); +int btrfs_join_running_log_trans(struct btrfs_root *root); +int btrfs_end_log_trans(struct btrfs_root *root); +int btrfs_pin_log_trans(struct btrfs_root *root); +int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + struct dentry *parent, int exists_only); +void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, + struct inode *dir, struct inode *inode, + int for_rename); +int btrfs_log_new_name(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *old_dir, + struct dentry *parent); #endif diff --git a/fs/buffer.c b/fs/buffer.c index a2fd743d97cb..f5f8b15a6e40 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -290,7 +290,7 @@ static void free_more_memory(void) &zone); if (zone) try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, - GFP_NOFS); + GFP_NOFS, NULL); } } @@ -547,6 +547,39 @@ repeat: return err; } +void do_thaw_all(unsigned long unused) +{ + struct super_block *sb; + char b[BDEVNAME_SIZE]; + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) + printk(KERN_WARNING "Emergency Thaw on %s\n", + bdevname(sb->s_bdev, b)); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); + printk(KERN_WARNING "Emergency Thaw complete\n"); +} + +/** + * emergency_thaw_all -- forcibly thaw every frozen filesystem + * + * Used for emergency unfreeze of all filesystems via SysRq + */ +void emergency_thaw_all(void) +{ + pdflush_operation(do_thaw_all, 0); +} + /** * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers * @mapping: the mapping which wants those buffers written @@ -621,14 +654,7 @@ static void __set_page_dirty(struct page *page, spin_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); - - if (mapping_cap_account_dirty(mapping)) { - __inc_zone_page_state(page, NR_FILE_DIRTY); - __inc_bdi_stat(mapping->backing_dev_info, - BDI_RECLAIMABLE); - task_dirty_inc(current); - task_io_account_write(PAGE_CACHE_SIZE); - } + account_page_dirtied(page, mapping); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } @@ -2320,13 +2346,14 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) * unlock the page. */ int -block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; unsigned long end; loff_t size; - int ret = -EINVAL; + int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ lock_page(page); size = i_size_read(inode); @@ -2346,6 +2373,13 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page, if (!ret) ret = block_commit_write(page, 0, end); + if (unlikely(ret)) { + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else /* -ENOSPC, -EIO, etc */ + ret = VM_FAULT_SIGBUS; + } + out_unlock: unlock_page(page); return ret; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index e4a6223c3145..af737bb56cb7 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -740,8 +740,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, out_release_free_unlock: crypto_free_hash(s->hash_desc.tfm); out_free_unlock: - memset(s->block_aligned_filename, 0, s->block_aligned_filename_size); - kfree(s->block_aligned_filename); + kzfree(s->block_aligned_filename); out_unlock: mutex_unlock(s->tfm_mutex); out: diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 96ef51489e01..295e7fa56755 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -291,8 +291,7 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) if (daemon->user_ns) put_user_ns(daemon->user_ns); mutex_unlock(&daemon->mux); - memset(daemon, 0, sizeof(*daemon)); - kfree(daemon); + kzfree(daemon); out: return rc; } diff --git a/fs/eventfd.c b/fs/eventfd.c index 5de2c2db3aa2..2a701d593d35 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -28,6 +28,7 @@ struct eventfd_ctx { * issue a wakeup. */ __u64 count; + unsigned int flags; }; /* @@ -50,7 +51,7 @@ int eventfd_signal(struct file *file, int n) n = (int) (ULLONG_MAX - ctx->count); ctx->count += n; if (waitqueue_active(&ctx->wqh)) - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, POLLIN); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return n; @@ -87,22 +88,20 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, { struct eventfd_ctx *ctx = file->private_data; ssize_t res; - __u64 ucnt; + __u64 ucnt = 0; DECLARE_WAITQUEUE(wait, current); if (count < sizeof(ucnt)) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; - ucnt = ctx->count; - if (ucnt > 0) + if (ctx->count > 0) res = sizeof(ucnt); else if (!(file->f_flags & O_NONBLOCK)) { __add_wait_queue(&ctx->wqh, &wait); for (res = 0;;) { set_current_state(TASK_INTERRUPTIBLE); if (ctx->count > 0) { - ucnt = ctx->count; res = sizeof(ucnt); break; } @@ -117,10 +116,11 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (res > 0) { - ctx->count = 0; + if (likely(res > 0)) { + ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; + ctx->count -= ucnt; if (waitqueue_active(&ctx->wqh)) - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, POLLOUT); } spin_unlock_irq(&ctx->wqh.lock); if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) @@ -166,10 +166,10 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (res > 0) { + if (likely(res > 0)) { ctx->count += ucnt; if (waitqueue_active(&ctx->wqh)) - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, POLLIN); } spin_unlock_irq(&ctx->wqh.lock); @@ -207,7 +207,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); - if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK)) + if (flags & ~EFD_FLAGS_SET) return -EINVAL; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); @@ -216,13 +216,14 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) init_waitqueue_head(&ctx->wqh); ctx->count = count; + ctx->flags = flags; /* * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, - flags & (O_CLOEXEC | O_NONBLOCK)); + flags & EFD_SHARED_FCNTL_FLAGS); if (fd < 0) kfree(ctx); return fd; @@ -232,3 +233,4 @@ SYSCALL_DEFINE1(eventfd, unsigned int, count) { return sys_eventfd2(count, 0); } + diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c5c424f23fd5..a89f370fadb5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1,6 +1,6 @@ /* - * fs/eventpoll.c (Efficent event polling implementation) - * Copyright (C) 2001,...,2007 Davide Libenzi + * fs/eventpoll.c (Efficient event retrieval implementation) + * Copyright (C) 2001,...,2009 Davide Libenzi * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -71,29 +71,11 @@ * a better scalability. */ -#define DEBUG_EPOLL 0 - -#if DEBUG_EPOLL > 0 -#define DPRINTK(x) printk x -#define DNPRINTK(n, x) do { if ((n) <= DEBUG_EPOLL) printk x; } while (0) -#else /* #if DEBUG_EPOLL > 0 */ -#define DPRINTK(x) (void) 0 -#define DNPRINTK(n, x) (void) 0 -#endif /* #if DEBUG_EPOLL > 0 */ - -#define DEBUG_EPI 0 - -#if DEBUG_EPI != 0 -#define EPI_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */) -#else /* #if DEBUG_EPI != 0 */ -#define EPI_SLAB_DEBUG 0 -#endif /* #if DEBUG_EPI != 0 */ - /* Epoll private bits inside the event mask */ #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) -/* Maximum number of poll wake up nests we are allowing */ -#define EP_MAX_POLLWAKE_NESTS 4 +/* Maximum number of nesting allowed inside epoll sets */ +#define EP_MAX_NESTS 4 /* Maximum msec timeout value storeable in a long int */ #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) @@ -110,24 +92,21 @@ struct epoll_filefd { }; /* - * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". - * It is used to keep track on all tasks that are currently inside the wake_up() code - * to 1) short-circuit the one coming from the same task and same wait queue head - * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting - * 3) let go the ones coming from other tasks. + * Structure used to track possible nested calls, for too deep recursions + * and loop cycles. */ -struct wake_task_node { +struct nested_call_node { struct list_head llink; - struct task_struct *task; - wait_queue_head_t *wq; + void *cookie; + int cpu; }; /* - * This is used to implement the safe poll wake up avoiding to reenter - * the poll callback from inside wake_up(). + * This structure is used as collector for nested calls, to check for + * maximum recursion dept and loop cycles. */ -struct poll_safewake { - struct list_head wake_task_list; +struct nested_calls { + struct list_head tasks_call_list; spinlock_t lock; }; @@ -213,7 +192,7 @@ struct eppoll_entry { struct list_head llink; /* The "base" pointer is set to the container "struct epitem" */ - void *base; + struct epitem *base; /* * Wait queue item that will be linked to the target file wait @@ -231,6 +210,12 @@ struct ep_pqueue { struct epitem *epi; }; +/* Used by the ep_send_events() function as callback private data */ +struct ep_send_events_data { + int maxevents; + struct epoll_event __user *events; +}; + /* * Configuration options available inside /proc/sys/fs/epoll/ */ @@ -242,8 +227,11 @@ static int max_user_watches __read_mostly; */ static DEFINE_MUTEX(epmutex); -/* Safe wake up implementation */ -static struct poll_safewake psw; +/* Used for safe wake up implementation */ +static struct nested_calls poll_safewake_ncalls; + +/* Used to call file's f_op->poll() under the nested calls boundaries */ +static struct nested_calls poll_readywalk_ncalls; /* Slab cache used to allocate "struct epitem" */ static struct kmem_cache *epi_cache __read_mostly; @@ -312,89 +300,230 @@ static inline int ep_op_has_event(int op) } /* Initialize the poll safe wake up structure */ -static void ep_poll_safewake_init(struct poll_safewake *psw) +static void ep_nested_calls_init(struct nested_calls *ncalls) { - - INIT_LIST_HEAD(&psw->wake_task_list); - spin_lock_init(&psw->lock); + INIT_LIST_HEAD(&ncalls->tasks_call_list); + spin_lock_init(&ncalls->lock); } -/* - * Perform a safe wake up of the poll wait list. The problem is that - * with the new callback'd wake up system, it is possible that the - * poll callback is reentered from inside the call to wake_up() done - * on the poll wait queue head. The rule is that we cannot reenter the - * wake up code from the same task more than EP_MAX_POLLWAKE_NESTS times, - * and we cannot reenter the same wait queue head at all. This will - * enable to have a hierarchy of epoll file descriptor of no more than - * EP_MAX_POLLWAKE_NESTS deep. We need the irq version of the spin lock - * because this one gets called by the poll callback, that in turn is called - * from inside a wake_up(), that might be called from irq context. +/** + * ep_call_nested - Perform a bound (possibly) nested call, by checking + * that the recursion limit is not exceeded, and that + * the same nested call (by the meaning of same cookie) is + * no re-entered. + * + * @ncalls: Pointer to the nested_calls structure to be used for this call. + * @max_nests: Maximum number of allowed nesting calls. + * @nproc: Nested call core function pointer. + * @priv: Opaque data to be passed to the @nproc callback. + * @cookie: Cookie to be used to identify this nested call. + * + * Returns: Returns the code returned by the @nproc callback, or -1 if + * the maximum recursion limit has been exceeded. */ -static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) +static int ep_call_nested(struct nested_calls *ncalls, int max_nests, + int (*nproc)(void *, void *, int), void *priv, + void *cookie) { - int wake_nests = 0; + int error, call_nests = 0; unsigned long flags; - struct task_struct *this_task = current; - struct list_head *lsthead = &psw->wake_task_list; - struct wake_task_node *tncur; - struct wake_task_node tnode; + int this_cpu = get_cpu(); + struct list_head *lsthead = &ncalls->tasks_call_list; + struct nested_call_node *tncur; + struct nested_call_node tnode; - spin_lock_irqsave(&psw->lock, flags); + spin_lock_irqsave(&ncalls->lock, flags); - /* Try to see if the current task is already inside this wakeup call */ + /* + * Try to see if the current task is already inside this wakeup call. + * We use a list here, since the population inside this set is always + * very much limited. + */ list_for_each_entry(tncur, lsthead, llink) { - - if (tncur->wq == wq || - (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) { + if (tncur->cpu == this_cpu && + (tncur->cookie == cookie || ++call_nests > max_nests)) { /* * Ops ... loop detected or maximum nest level reached. * We abort this wake by breaking the cycle itself. */ - spin_unlock_irqrestore(&psw->lock, flags); - return; + error = -1; + goto out_unlock; } } - /* Add the current task to the list */ - tnode.task = this_task; - tnode.wq = wq; + /* Add the current task and cookie to the list */ + tnode.cpu = this_cpu; + tnode.cookie = cookie; list_add(&tnode.llink, lsthead); - spin_unlock_irqrestore(&psw->lock, flags); + spin_unlock_irqrestore(&ncalls->lock, flags); - /* Do really wake up now */ - wake_up_nested(wq, 1 + wake_nests); + /* Call the nested function */ + error = (*nproc)(priv, cookie, call_nests); /* Remove the current task from the list */ - spin_lock_irqsave(&psw->lock, flags); + spin_lock_irqsave(&ncalls->lock, flags); list_del(&tnode.llink); - spin_unlock_irqrestore(&psw->lock, flags); + out_unlock: + spin_unlock_irqrestore(&ncalls->lock, flags); + + put_cpu(); + return error; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, + unsigned long events, int subclass) +{ + unsigned long flags; + + spin_lock_irqsave_nested(&wqueue->lock, flags, subclass); + wake_up_locked_poll(wqueue, events); + spin_unlock_irqrestore(&wqueue->lock, flags); +} +#else +static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, + unsigned long events, int subclass) +{ + wake_up_poll(wqueue, events); +} +#endif + +static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) +{ + ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN, + 1 + call_nests); + return 0; +} + +/* + * Perform a safe wake up of the poll wait list. The problem is that + * with the new callback'd wake up system, it is possible that the + * poll callback is reentered from inside the call to wake_up() done + * on the poll wait queue head. The rule is that we cannot reenter the + * wake up code from the same task more than EP_MAX_NESTS times, + * and we cannot reenter the same wait queue head at all. This will + * enable to have a hierarchy of epoll file descriptor of no more than + * EP_MAX_NESTS deep. + */ +static void ep_poll_safewake(wait_queue_head_t *wq) +{ + ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, + ep_poll_wakeup_proc, NULL, wq); } /* - * This function unregister poll callbacks from the associated file descriptor. - * Since this must be called without holding "ep->lock" the atomic exchange trick - * will protect us from multiple unregister. + * This function unregisters poll callbacks from the associated file + * descriptor. Must be called with "mtx" held (or "epmutex" if called from + * ep_free). */ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) { - int nwait; struct list_head *lsthead = &epi->pwqlist; struct eppoll_entry *pwq; - /* This is called without locks, so we need the atomic exchange */ - nwait = xchg(&epi->nwait, 0); + while (!list_empty(lsthead)) { + pwq = list_first_entry(lsthead, struct eppoll_entry, llink); - if (nwait) { - while (!list_empty(lsthead)) { - pwq = list_first_entry(lsthead, struct eppoll_entry, llink); + list_del(&pwq->llink); + remove_wait_queue(pwq->whead, &pwq->wait); + kmem_cache_free(pwq_cache, pwq); + } +} - list_del_init(&pwq->llink); - remove_wait_queue(pwq->whead, &pwq->wait); - kmem_cache_free(pwq_cache, pwq); - } +/** + * ep_scan_ready_list - Scans the ready list in a way that makes possible for + * the scan code, to call f_op->poll(). Also allows for + * O(NumReady) performance. + * + * @ep: Pointer to the epoll private data structure. + * @sproc: Pointer to the scan callback. + * @priv: Private opaque data passed to the @sproc callback. + * + * Returns: The same integer error code returned by the @sproc callback. + */ +static int ep_scan_ready_list(struct eventpoll *ep, + int (*sproc)(struct eventpoll *, + struct list_head *, void *), + void *priv) +{ + int error, pwake = 0; + unsigned long flags; + struct epitem *epi, *nepi; + LIST_HEAD(txlist); + + /* + * We need to lock this because we could be hit by + * eventpoll_release_file() and epoll_ctl(). + */ + mutex_lock(&ep->mtx); + + /* + * Steal the ready list, and re-init the original one to the + * empty list. Also, set ep->ovflist to NULL so that events + * happening while looping w/out locks, are not lost. We cannot + * have the poll callback to queue directly on ep->rdllist, + * because we want the "sproc" callback to be able to do it + * in a lockless way. + */ + spin_lock_irqsave(&ep->lock, flags); + list_splice_init(&ep->rdllist, &txlist); + ep->ovflist = NULL; + spin_unlock_irqrestore(&ep->lock, flags); + + /* + * Now call the callback function. + */ + error = (*sproc)(ep, &txlist, priv); + + spin_lock_irqsave(&ep->lock, flags); + /* + * During the time we spent inside the "sproc" callback, some + * other events might have been queued by the poll callback. + * We re-insert them inside the main ready-list here. + */ + for (nepi = ep->ovflist; (epi = nepi) != NULL; + nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { + /* + * We need to check if the item is already in the list. + * During the "sproc" callback execution time, items are + * queued into ->ovflist but the "txlist" might already + * contain them, and the list_splice() below takes care of them. + */ + if (!ep_is_linked(&epi->rdllink)) + list_add_tail(&epi->rdllink, &ep->rdllist); + } + /* + * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after + * releasing the lock, events will be queued in the normal way inside + * ep->rdllist. + */ + ep->ovflist = EP_UNACTIVE_PTR; + + /* + * Quickly re-inject items left on "txlist". + */ + list_splice(&txlist, &ep->rdllist); + + if (!list_empty(&ep->rdllist)) { + /* + * Wake up (if active) both the eventpoll wait list and + * the ->poll() wait list (delayed after we release the lock). + */ + if (waitqueue_active(&ep->wq)) + wake_up_locked(&ep->wq); + if (waitqueue_active(&ep->poll_wait)) + pwake++; } + spin_unlock_irqrestore(&ep->lock, flags); + + mutex_unlock(&ep->mtx); + + /* We have to call this outside the lock */ + if (pwake) + ep_poll_safewake(&ep->poll_wait); + + return error; } /* @@ -434,9 +563,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) atomic_dec(&ep->user->epoll_watches); - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", - current, ep, file)); - return 0; } @@ -447,7 +573,7 @@ static void ep_free(struct eventpoll *ep) /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) - ep_poll_safewake(&psw, &ep->poll_wait); + ep_poll_safewake(&ep->poll_wait); /* * We need to lock this because we could be hit by @@ -492,26 +618,54 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) if (ep) ep_free(ep); - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); return 0; } +static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, + void *priv) +{ + struct epitem *epi, *tmp; + + list_for_each_entry_safe(epi, tmp, head, rdllink) { + if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & + epi->event.events) + return POLLIN | POLLRDNORM; + else { + /* + * Item has been dropped into the ready list by the poll + * callback, but it's not actually ready, as far as + * caller requested events goes. We can remove it here. + */ + list_del_init(&epi->rdllink); + } + } + + return 0; +} + +static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) +{ + return ep_scan_ready_list(priv, ep_read_events_proc, NULL); +} + static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) { - unsigned int pollflags = 0; - unsigned long flags; + int pollflags; struct eventpoll *ep = file->private_data; /* Insert inside our poll wait queue */ poll_wait(file, &ep->poll_wait, wait); - /* Check our condition */ - spin_lock_irqsave(&ep->lock, flags); - if (!list_empty(&ep->rdllist)) - pollflags = POLLIN | POLLRDNORM; - spin_unlock_irqrestore(&ep->lock, flags); + /* + * Proceed to find out if wanted events are really available inside + * the ready list. This need to be done under ep_call_nested() + * supervision, since the call to f_op->poll() done on listed files + * could re-enter here. + */ + pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, + ep_poll_readyevents_proc, ep, ep); - return pollflags; + return pollflags != -1 ? pollflags : 0; } /* File callbacks that implement the eventpoll file behaviour */ @@ -541,7 +695,7 @@ void eventpoll_release_file(struct file *file) * We don't want to get "file->f_lock" because it is not * necessary. It is not necessary because we're in the "struct file" * cleanup path, and this means that noone is using this file anymore. - * So, for example, epoll_ctl() cannot hit here sicne if we reach this + * So, for example, epoll_ctl() cannot hit here since if we reach this * point, the file counter already went to zero and fget() would fail. * The only hit might come from ep_free() but by holding the mutex * will correctly serialize the operation. We do need to acquire @@ -588,8 +742,6 @@ static int ep_alloc(struct eventpoll **pep) *pep = ep; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", - current, ep)); return 0; free_uid: @@ -623,9 +775,6 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) } } - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", - current, file, epir)); - return epir; } @@ -641,9 +790,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", - current, epi->ffd.file, epi, ep)); - spin_lock_irqsave(&ep->lock, flags); /* @@ -656,6 +802,15 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k goto out_unlock; /* + * Check the events coming with the callback. At this stage, not + * every device reports the events in the "key" parameter of the + * callback. We need to be able to handle both cases here, hence the + * test for "key" != NULL before the event match test. + */ + if (key && !((unsigned long) key & epi->event.events)) + goto out_unlock; + + /* * If we are trasfering events to userspace, we can hold no locks * (because we're accessing user memory, and because of linux f_op->poll() * semantics). All the events that happens during that period of time are @@ -670,12 +825,9 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k } /* If this file is already in the ready list we exit soon */ - if (ep_is_linked(&epi->rdllink)) - goto is_linked; - - list_add_tail(&epi->rdllink, &ep->rdllist); + if (!ep_is_linked(&epi->rdllink)) + list_add_tail(&epi->rdllink, &ep->rdllist); -is_linked: /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() * wait list. @@ -690,7 +842,7 @@ out_unlock: /* We have to call this outside the lock */ if (pwake) - ep_poll_safewake(&psw, &ep->poll_wait); + ep_poll_safewake(&ep->poll_wait); return 1; } @@ -817,10 +969,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* We have to call this outside the lock */ if (pwake) - ep_poll_safewake(&psw, &ep->poll_wait); - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n", - current, ep, tfile, fd)); + ep_poll_safewake(&ep->poll_wait); return 0; @@ -851,15 +1000,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even { int pwake = 0; unsigned int revents; - unsigned long flags; /* - * Set the new event interest mask before calling f_op->poll(), otherwise - * a potential race might occur. In fact if we do this operation inside - * the lock, an event might happen between the f_op->poll() call and the - * new event set registering. + * Set the new event interest mask before calling f_op->poll(); + * otherwise we might miss an event that happens between the + * f_op->poll() call and the new event set registering. */ epi->event.events = event->events; + epi->event.data = event->data; /* protected by mtx */ /* * Get current event bits. We can safely use the file* here because @@ -867,16 +1015,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even */ revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); - spin_lock_irqsave(&ep->lock, flags); - - /* Copy the data member from inside the lock */ - epi->event.data = event->data; - /* * If the item is "hot" and it is not registered inside the ready * list, push it inside. */ if (revents & event->events) { + spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); @@ -886,142 +1030,84 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even if (waitqueue_active(&ep->poll_wait)) pwake++; } + spin_unlock_irq(&ep->lock); } - spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) - ep_poll_safewake(&psw, &ep->poll_wait); + ep_poll_safewake(&ep->poll_wait); return 0; } -static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents) +static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, + void *priv) { - int eventcnt, error = -EFAULT, pwake = 0; + struct ep_send_events_data *esed = priv; + int eventcnt; unsigned int revents; - unsigned long flags; - struct epitem *epi, *nepi; - struct list_head txlist; - - INIT_LIST_HEAD(&txlist); - - /* - * We need to lock this because we could be hit by - * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). - */ - mutex_lock(&ep->mtx); - - /* - * Steal the ready list, and re-init the original one to the - * empty list. Also, set ep->ovflist to NULL so that events - * happening while looping w/out locks, are not lost. We cannot - * have the poll callback to queue directly on ep->rdllist, - * because we are doing it in the loop below, in a lockless way. - */ - spin_lock_irqsave(&ep->lock, flags); - list_splice(&ep->rdllist, &txlist); - INIT_LIST_HEAD(&ep->rdllist); - ep->ovflist = NULL; - spin_unlock_irqrestore(&ep->lock, flags); + struct epitem *epi; + struct epoll_event __user *uevent; /* - * We can loop without lock because this is a task private list. - * We just splice'd out the ep->rdllist in ep_collect_ready_items(). - * Items cannot vanish during the loop because we are holding "mtx". + * We can loop without lock because we are passed a task private list. + * Items cannot vanish during the loop because ep_scan_ready_list() is + * holding "mtx" during this call. */ - for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) { - epi = list_first_entry(&txlist, struct epitem, rdllink); + for (eventcnt = 0, uevent = esed->events; + !list_empty(head) && eventcnt < esed->maxevents;) { + epi = list_first_entry(head, struct epitem, rdllink); list_del_init(&epi->rdllink); - /* - * Get the ready file event set. We can safely use the file - * because we are holding the "mtx" and this will guarantee - * that both the file and the item will not vanish. - */ - revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); - revents &= epi->event.events; + revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & + epi->event.events; /* - * Is the event mask intersect the caller-requested one, - * deliver the event to userspace. Again, we are holding - * "mtx", so no operations coming from userspace can change - * the item. + * If the event mask intersect the caller-requested one, + * deliver the event to userspace. Again, ep_scan_ready_list() + * is holding "mtx", so no operations coming from userspace + * can change the item. */ if (revents) { - if (__put_user(revents, - &events[eventcnt].events) || - __put_user(epi->event.data, - &events[eventcnt].data)) - goto errxit; + if (__put_user(revents, &uevent->events) || + __put_user(epi->event.data, &uevent->data)) { + list_add(&epi->rdllink, head); + return eventcnt ? eventcnt : -EFAULT; + } + eventcnt++; + uevent++; if (epi->event.events & EPOLLONESHOT) epi->event.events &= EP_PRIVATE_BITS; - eventcnt++; + else if (!(epi->event.events & EPOLLET)) { + /* + * If this file has been added with Level + * Trigger mode, we need to insert back inside + * the ready list, so that the next call to + * epoll_wait() will check again the events + * availability. At this point, noone can insert + * into ep->rdllist besides us. The epoll_ctl() + * callers are locked out by + * ep_scan_ready_list() holding "mtx" and the + * poll callback will queue them in ep->ovflist. + */ + list_add_tail(&epi->rdllink, &ep->rdllist); + } } - /* - * At this point, noone can insert into ep->rdllist besides - * us. The epoll_ctl() callers are locked out by us holding - * "mtx" and the poll callback will queue them in ep->ovflist. - */ - if (!(epi->event.events & EPOLLET) && - (revents & epi->event.events)) - list_add_tail(&epi->rdllink, &ep->rdllist); - } - error = 0; - -errxit: - - spin_lock_irqsave(&ep->lock, flags); - /* - * During the time we spent in the loop above, some other events - * might have been queued by the poll callback. We re-insert them - * inside the main ready-list here. - */ - for (nepi = ep->ovflist; (epi = nepi) != NULL; - nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { - /* - * If the above loop quit with errors, the epoll item might still - * be linked to "txlist", and the list_splice() done below will - * take care of those cases. - */ - if (!ep_is_linked(&epi->rdllink)) - list_add_tail(&epi->rdllink, &ep->rdllist); } - /* - * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after - * releasing the lock, events will be queued in the normal way inside - * ep->rdllist. - */ - ep->ovflist = EP_UNACTIVE_PTR; - /* - * In case of error in the event-send loop, or in case the number of - * ready events exceeds the userspace limit, we need to splice the - * "txlist" back inside ep->rdllist. - */ - list_splice(&txlist, &ep->rdllist); - - if (!list_empty(&ep->rdllist)) { - /* - * Wake up (if active) both the eventpoll wait list and the ->poll() - * wait list (delayed after we release the lock). - */ - if (waitqueue_active(&ep->wq)) - wake_up_locked(&ep->wq); - if (waitqueue_active(&ep->poll_wait)) - pwake++; - } - spin_unlock_irqrestore(&ep->lock, flags); + return eventcnt; +} - mutex_unlock(&ep->mtx); +static int ep_send_events(struct eventpoll *ep, + struct epoll_event __user *events, int maxevents) +{ + struct ep_send_events_data esed; - /* We have to call this outside the lock */ - if (pwake) - ep_poll_safewake(&psw, &ep->poll_wait); + esed.maxevents = maxevents; + esed.events = events; - return eventcnt == 0 ? error: eventcnt; + return ep_scan_ready_list(ep, ep_send_events_proc, &esed); } static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, @@ -1033,7 +1119,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, wait_queue_t wait; /* - * Calculate the timeout by checking for the "infinite" value ( -1 ) + * Calculate the timeout by checking for the "infinite" value (-1) * and the overflow condition. The passed timeout is in milliseconds, * that why (t * HZ) / 1000. */ @@ -1076,9 +1162,8 @@ retry: set_current_state(TASK_RUNNING); } - /* Is it worth to try to dig for events ? */ - eavail = !list_empty(&ep->rdllist); + eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; spin_unlock_irqrestore(&ep->lock, flags); @@ -1099,41 +1184,30 @@ retry: */ SYSCALL_DEFINE1(epoll_create1, int, flags) { - int error, fd = -1; - struct eventpoll *ep; + int error; + struct eventpoll *ep = NULL; /* Check the EPOLL_* constant for consistency. */ BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); if (flags & ~EPOLL_CLOEXEC) return -EINVAL; - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", - current, flags)); - /* - * Create the internal data structure ( "struct eventpoll" ). + * Create the internal data structure ("struct eventpoll"). */ error = ep_alloc(&ep); - if (error < 0) { - fd = error; - goto error_return; - } - + if (error < 0) + return error; /* * Creates all the items needed to setup an eventpoll file. That is, * a file structure and a free file descriptor. */ - fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, - flags & O_CLOEXEC); - if (fd < 0) + error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, + flags & O_CLOEXEC); + if (error < 0) ep_free(ep); -error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", - current, flags, fd)); - - return fd; + return error; } SYSCALL_DEFINE1(epoll_create, int, size) @@ -1158,9 +1232,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, struct epitem *epi; struct epoll_event epds; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n", - current, epfd, op, fd, event)); - error = -EFAULT; if (ep_op_has_event(op) && copy_from_user(&epds, event, sizeof(struct epoll_event))) @@ -1211,7 +1282,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, case EPOLL_CTL_ADD: if (!epi) { epds.events |= POLLERR | POLLHUP; - error = ep_insert(ep, &epds, tfile, fd); } else error = -EEXIST; @@ -1237,8 +1307,6 @@ error_tgt_fput: error_fput: fput(file); error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n", - current, epfd, op, fd, event, error)); return error; } @@ -1254,9 +1322,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, struct file *file; struct eventpoll *ep; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n", - current, epfd, events, maxevents, timeout)); - /* The maximum number of event must be greater than zero */ if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) return -EINVAL; @@ -1293,8 +1358,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, error_fput: fput(file); error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n", - current, epfd, events, maxevents, timeout, error)); return error; } @@ -1359,17 +1422,18 @@ static int __init eventpoll_init(void) EP_ITEM_COST; /* Initialize the structure used to perform safe poll wait head wake ups */ - ep_poll_safewake_init(&psw); + ep_nested_calls_init(&poll_safewake_ncalls); + + /* Initialize the structure used to perform file's f_op->poll() calls */ + ep_nested_calls_init(&poll_readywalk_ncalls); /* Allocates slab cache used to allocate "struct epitem" items */ epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), - 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC, - NULL); + 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); /* Allocates slab cache used to allocate "struct eppoll_entry" */ pwq_cache = kmem_cache_create("eventpoll_pwq", - sizeof(struct eppoll_entry), 0, - EPI_SLAB_DEBUG|SLAB_PANIC, NULL); + sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL); return 0; } diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 38f40d55899c..53c72ad85877 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -55,7 +55,8 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, } static int ext4_group_used_meta_blocks(struct super_block *sb, - ext4_group_t block_group) + ext4_group_t block_group, + struct ext4_group_desc *gdp) { ext4_fsblk_t tmp; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -63,10 +64,6 @@ static int ext4_group_used_meta_blocks(struct super_block *sb, int used_blocks = sbi->s_itb_per_group + 2; if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { - struct ext4_group_desc *gdp; - struct buffer_head *bh; - - gdp = ext4_get_group_desc(sb, block_group, &bh); if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) used_blocks--; @@ -177,7 +174,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); } - return free_blocks - ext4_group_used_meta_blocks(sb, block_group); + return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); } @@ -473,9 +470,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - spin_lock(sb_bgl_lock(sbi, flex_group)); - sbi->s_flex_groups[flex_group].free_blocks += blocks_freed; - spin_unlock(sb_bgl_lock(sbi, flex_group)); + atomic_add(blocks_freed, + &sbi->s_flex_groups[flex_group].free_blocks); } /* * request to reload the buddy with the diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 2df2e40b01af..b64789929a65 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -67,7 +67,8 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, unsigned int offset) { const char *error_msg = NULL; - const int rlen = ext4_rec_len_from_disk(de->rec_len); + const int rlen = ext4_rec_len_from_disk(de->rec_len, + dir->i_sb->s_blocksize); if (rlen < EXT4_DIR_REC_LEN(1)) error_msg = "rec_len is smaller than minimal"; @@ -178,10 +179,11 @@ revalidate: * least that it is non-zero. A * failure will be detected in the * dirent test below. */ - if (ext4_rec_len_from_disk(de->rec_len) - < EXT4_DIR_REC_LEN(1)) + if (ext4_rec_len_from_disk(de->rec_len, + sb->s_blocksize) < EXT4_DIR_REC_LEN(1)) break; - i += ext4_rec_len_from_disk(de->rec_len); + i += ext4_rec_len_from_disk(de->rec_len, + sb->s_blocksize); } offset = i; filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) @@ -203,7 +205,8 @@ revalidate: ret = stored; goto out; } - offset += ext4_rec_len_from_disk(de->rec_len); + offset += ext4_rec_len_from_disk(de->rec_len, + sb->s_blocksize); if (le32_to_cpu(de->inode)) { /* We might block in the next section * if the data destination is @@ -225,7 +228,8 @@ revalidate: goto revalidate; stored++; } - filp->f_pos += ext4_rec_len_from_disk(de->rec_len); + filp->f_pos += ext4_rec_len_from_disk(de->rec_len, + sb->s_blocksize); } offset = 0; brelse(bh); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6083bb38057b..d0f15ef56de1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -33,14 +33,6 @@ #undef EXT4FS_DEBUG /* - * Define EXT4_RESERVATION to reserve data blocks for expanding files - */ -#define EXT4_DEFAULT_RESERVE_BLOCKS 8 -/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ -#define EXT4_MAX_RESERVE_BLOCKS 1027 -#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0 - -/* * Debug code */ #ifdef EXT4FS_DEBUG @@ -54,8 +46,6 @@ #define ext4_debug(f, a...) do {} while (0) #endif -#define EXT4_MULTIBLOCK_ALLOCATOR 1 - /* prefer goal again. length */ #define EXT4_MB_HINT_MERGE 1 /* blocks already reserved */ @@ -180,8 +170,9 @@ struct ext4_group_desc */ struct flex_groups { - __u32 free_inodes; - __u32 free_blocks; + atomic_t free_inodes; + atomic_t free_blocks; + atomic_t used_dirs; }; #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ @@ -249,6 +240,30 @@ struct flex_groups { #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ #define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ +/* Flags that should be inherited by new inodes from their parent. */ +#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ + EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\ + EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ + EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ + EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) + +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & EXT4_REG_FLMASK; + else + return flags & EXT4_OTHER_FLMASK; +} + /* * Inode dynamic state flags */ @@ -256,6 +271,7 @@ struct flex_groups { #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ +#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ /* Used to pass group descriptor data when online resize is done */ struct ext4_new_group_input { @@ -303,7 +319,9 @@ struct ext4_new_group_data { #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) #define EXT4_IOC_MIGRATE _IO('f', 9) + /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ +#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) /* * ioctl commands in 32 bit emulation @@ -531,7 +549,7 @@ do { \ #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ #define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ -#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */ +#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ #define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ @@ -666,7 +684,8 @@ struct ext4_super_block { __u8 s_log_groups_per_flex; /* FLEX_BG group size */ __u8 s_reserved_char_pad2; __le16 s_reserved_pad; - __u32 s_reserved[162]; /* Padding to the end of the block */ + __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ + __u32 s_reserved[160]; /* Padding to the end of the block */ }; #ifdef __KERNEL__ @@ -814,6 +833,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ /* + * Minimum number of groups in a flexgroup before we separate out + * directories into the first block group of a flexgroup + */ +#define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4 + +/* * Structure of a directory entry */ #define EXT4_NAME_LEN 255 @@ -865,24 +890,6 @@ struct ext4_dir_entry_2 { ~EXT4_DIR_ROUND) #define EXT4_MAX_REC_LEN ((1<<16)-1) -static inline unsigned ext4_rec_len_from_disk(__le16 dlen) -{ - unsigned len = le16_to_cpu(dlen); - - if (len == EXT4_MAX_REC_LEN || len == 0) - return 1 << 16; - return len; -} - -static inline __le16 ext4_rec_len_to_disk(unsigned len) -{ - if (len == (1 << 16)) - return cpu_to_le16(EXT4_MAX_REC_LEN); - else if (len > (1 << 16)) - BUG(); - return cpu_to_le16(len); -} - /* * Hash Tree Directory indexing * (c) Daniel Phillips, 2001 @@ -970,22 +977,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, extern struct proc_dir_entry *ext4_proc_root; -#ifdef CONFIG_PROC_FS -extern const struct file_operations ext4_ui_proc_fops; - -#define EXT4_PROC_HANDLER(name, var) \ -do { \ - proc = proc_create_data(name, mode, sbi->s_proc, \ - &ext4_ui_proc_fops, &sbi->s_##var); \ - if (proc == NULL) { \ - printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \ - goto err_out; \ - } \ -} while (0) -#else -#define EXT4_PROC_HANDLER(name, var) -#endif - /* * Function prototypes */ @@ -1092,13 +1083,14 @@ extern int ext4_can_truncate(struct inode *inode); extern void ext4_truncate(struct inode *); extern void ext4_set_inode_flags(struct inode *); extern void ext4_get_inode_flags(struct ext4_inode_info *); +extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t ext4_get_reserved_space(struct inode *inode); /* ioctl.c */ @@ -1107,7 +1099,10 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); /* migrate.c */ extern int ext4_ext_migrate(struct inode *); + /* namei.c */ +extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize); +extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize); extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 18cb67b2cbbc..f0c3ec85bd48 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -241,5 +241,6 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, ext4_lblk_t *, ext4_fsblk_t *); extern void ext4_ext_drop_refs(struct ext4_ext_path *); +extern int ext4_ext_check_inode(struct inode *inode); #endif /* _EXT4_EXTENTS */ diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index e69acc16f5c4..4ce2187123aa 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h @@ -33,9 +33,6 @@ typedef __u32 ext4_lblk_t; /* data type for block group number */ typedef unsigned int ext4_group_t; -#define rsv_start rsv_window._rsv_start -#define rsv_end rsv_window._rsv_end - /* * storage for cached extent */ @@ -125,6 +122,9 @@ struct ext4_inode_info { struct list_head i_prealloc_list; spinlock_t i_prealloc_lock; + /* ialloc */ + ext4_group_t i_last_alloc_group; + /* allocation reservation info for delalloc */ unsigned int i_reserved_data_blocks; unsigned int i_reserved_meta_blocks; diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 039b6ea1a042..57b71fefbccf 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h @@ -62,12 +62,10 @@ struct ext4_sb_info { struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; struct percpu_counter s_dirtyblocks_counter; - struct blockgroup_lock s_blockgroup_lock; + struct blockgroup_lock *s_blockgroup_lock; struct proc_dir_entry *s_proc; - - /* root of the per fs reservation window tree */ - spinlock_t s_rsv_window_lock; - struct rb_root s_rsv_window_root; + struct kobject s_kobj; + struct completion s_kobj_unregister; /* Journaling */ struct inode *s_journal_inode; @@ -146,6 +144,10 @@ struct ext4_sb_info { /* locality groups */ struct ext4_locality_group *s_locality_groups; + /* for write statistics */ + unsigned long s_sectors_written_start; + u64 s_kbytes_written; + unsigned int s_log_groups_per_flex; struct flex_groups *s_flex_groups; }; @@ -153,7 +155,7 @@ struct ext4_sb_info { static inline spinlock_t * sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group) { - return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); + return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); } #endif /* _EXT4_SB */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e0aa4fe4f596..ac77d8b8251d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -152,6 +152,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, ext4_fsblk_t bg_start; ext4_fsblk_t last_block; ext4_grpblk_t colour; + ext4_group_t block_group; + int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); int depth; if (path) { @@ -170,10 +172,31 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, } /* OK. use inode's group */ - bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + + block_group = ei->i_block_group; + if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { + /* + * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME + * block groups per flexgroup, reserve the first block + * group for directories and special files. Regular + * files will start at the second block group. This + * tends to speed up directory access and improves + * fsck times. + */ + block_group &= ~(flex_size-1); + if (S_ISREG(inode->i_mode)) + block_group++; + } + bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + /* + * If we are doing delayed allocation, we don't need take + * colour into account. + */ + if (test_opt(inode->i_sb, DELALLOC)) + return bg_start; + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) colour = (current->pid % 16) * (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); @@ -301,7 +324,64 @@ ext4_ext_max_entries(struct inode *inode, int depth) return max; } -static int __ext4_ext_check_header(const char *function, struct inode *inode, +static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) +{ + ext4_fsblk_t block = ext_pblock(ext); + int len = ext4_ext_get_actual_len(ext); + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + if (unlikely(block < le32_to_cpu(es->s_first_data_block) || + ((block + len) > ext4_blocks_count(es)))) + return 0; + else + return 1; +} + +static int ext4_valid_extent_idx(struct inode *inode, + struct ext4_extent_idx *ext_idx) +{ + ext4_fsblk_t block = idx_pblock(ext_idx); + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + if (unlikely(block < le32_to_cpu(es->s_first_data_block) || + (block > ext4_blocks_count(es)))) + return 0; + else + return 1; +} + +static int ext4_valid_extent_entries(struct inode *inode, + struct ext4_extent_header *eh, + int depth) +{ + struct ext4_extent *ext; + struct ext4_extent_idx *ext_idx; + unsigned short entries; + if (eh->eh_entries == 0) + return 1; + + entries = le16_to_cpu(eh->eh_entries); + + if (depth == 0) { + /* leaf entries */ + ext = EXT_FIRST_EXTENT(eh); + while (entries) { + if (!ext4_valid_extent(inode, ext)) + return 0; + ext++; + entries--; + } + } else { + ext_idx = EXT_FIRST_INDEX(eh); + while (entries) { + if (!ext4_valid_extent_idx(inode, ext_idx)) + return 0; + ext_idx++; + entries--; + } + } + return 1; +} + +static int __ext4_ext_check(const char *function, struct inode *inode, struct ext4_extent_header *eh, int depth) { @@ -329,11 +409,15 @@ static int __ext4_ext_check_header(const char *function, struct inode *inode, error_msg = "invalid eh_entries"; goto corrupted; } + if (!ext4_valid_extent_entries(inode, eh, depth)) { + error_msg = "invalid extent entries"; + goto corrupted; + } return 0; corrupted: ext4_error(inode->i_sb, function, - "bad header in inode #%lu: %s - magic %x, " + "bad header/extent in inode #%lu: %s - magic %x, " "entries %u, max %u(%u), depth %u(%u)", inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), @@ -342,8 +426,13 @@ corrupted: return -EIO; } -#define ext4_ext_check_header(inode, eh, depth) \ - __ext4_ext_check_header(__func__, inode, eh, depth) +#define ext4_ext_check(inode, eh, depth) \ + __ext4_ext_check(__func__, inode, eh, depth) + +int ext4_ext_check_inode(struct inode *inode) +{ + return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); +} #ifdef EXT_DEBUG static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) @@ -547,9 +636,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, eh = ext_inode_hdr(inode); depth = ext_depth(inode); - if (ext4_ext_check_header(inode, eh, depth)) - return ERR_PTR(-EIO); - /* account possible depth increase */ if (!path) { @@ -565,6 +651,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, i = depth; /* walk through the tree */ while (i) { + int need_to_validate = 0; + ext_debug("depth %d: num %d, max %d\n", ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); @@ -573,10 +661,17 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_depth = i; path[ppos].p_ext = NULL; - bh = sb_bread(inode->i_sb, path[ppos].p_block); - if (!bh) + bh = sb_getblk(inode->i_sb, path[ppos].p_block); + if (unlikely(!bh)) goto err; - + if (!bh_uptodate_or_lock(bh)) { + if (bh_submit_read(bh) < 0) { + put_bh(bh); + goto err; + } + /* validate the extent entries */ + need_to_validate = 1; + } eh = ext_block_hdr(bh); ppos++; BUG_ON(ppos > depth); @@ -584,7 +679,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_hdr = eh; i--; - if (ext4_ext_check_header(inode, eh, i)) + if (need_to_validate && ext4_ext_check(inode, eh, i)) goto err; } @@ -1181,7 +1276,7 @@ got_index: return -EIO; eh = ext_block_hdr(bh); /* subtract from p_depth to get proper eh_depth */ - if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) { + if (ext4_ext_check(inode, eh, path->p_depth - depth)) { put_bh(bh); return -EIO; } @@ -1194,7 +1289,7 @@ got_index: if (bh == NULL) return -EIO; eh = ext_block_hdr(bh); - if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) { + if (ext4_ext_check(inode, eh, path->p_depth - depth)) { put_bh(bh); return -EIO; } @@ -2137,7 +2232,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) return -ENOMEM; } path[0].p_hdr = ext_inode_hdr(inode); - if (ext4_ext_check_header(inode, path[0].p_hdr, depth)) { + if (ext4_ext_check(inode, path[0].p_hdr, depth)) { err = -EIO; goto out; } @@ -2191,7 +2286,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) err = -EIO; break; } - if (ext4_ext_check_header(inode, ext_block_hdr(bh), + if (ext4_ext_check(inode, ext_block_hdr(bh), depth - i - 1)) { err = -EIO; break; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index f731cb545a03..588af8c77246 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -33,9 +33,14 @@ */ static int ext4_release_file(struct inode *inode, struct file *filp) { + if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { + ext4_alloc_da_blocks(inode); + EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; + } /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && - (atomic_read(&inode->i_writecount) == 1)) + (atomic_read(&inode->i_writecount) == 1) && + !EXT4_I(inode)->i_reserved_data_blocks) { down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index fb51b40e3e8f..47b84e8df568 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -189,7 +189,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) struct ext4_super_block *es; struct ext4_sb_info *sbi; int fatal = 0, err, count, cleared; - ext4_group_t flex_group; if (atomic_read(&inode->i_count) > 1) { printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", @@ -268,6 +267,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) if (is_directory) { count = ext4_used_dirs_count(sb, gdp) - 1; ext4_used_dirs_set(sb, gdp, count); + if (sbi->s_log_groups_per_flex) { + ext4_group_t f; + + f = ext4_flex_group(sbi, block_group); + atomic_dec(&sbi->s_flex_groups[f].free_inodes); + } + } gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); @@ -277,10 +283,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) percpu_counter_dec(&sbi->s_dirs_counter); if (sbi->s_log_groups_per_flex) { - flex_group = ext4_flex_group(sbi, block_group); - spin_lock(sb_bgl_lock(sbi, flex_group)); - sbi->s_flex_groups[flex_group].free_inodes++; - spin_unlock(sb_bgl_lock(sbi, flex_group)); + ext4_group_t f; + + f = ext4_flex_group(sbi, block_group); + atomic_inc(&sbi->s_flex_groups[f].free_inodes); } } BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); @@ -360,9 +366,9 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, sbi->s_log_groups_per_flex; find_close_to_parent: - flexbg_free_blocks = flex_group[best_flex].free_blocks; + flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; - if (flex_group[best_flex].free_inodes && + if (atomic_read(&flex_group[best_flex].free_inodes) && flex_freeb_ratio > free_block_ratio) goto found_flexbg; @@ -375,24 +381,24 @@ find_close_to_parent: if (i == parent_fbg_group || i == parent_fbg_group - 1) continue; - flexbg_free_blocks = flex_group[i].free_blocks; + flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; if (flex_freeb_ratio > free_block_ratio && - flex_group[i].free_inodes) { + (atomic_read(&flex_group[i].free_inodes))) { best_flex = i; goto found_flexbg; } - if (flex_group[best_flex].free_inodes == 0 || - (flex_group[i].free_blocks > - flex_group[best_flex].free_blocks && - flex_group[i].free_inodes)) + if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || + ((atomic_read(&flex_group[i].free_blocks) > + atomic_read(&flex_group[best_flex].free_blocks)) && + atomic_read(&flex_group[i].free_inodes))) best_flex = i; } - if (!flex_group[best_flex].free_inodes || - !flex_group[best_flex].free_blocks) + if (!atomic_read(&flex_group[best_flex].free_inodes) || + !atomic_read(&flex_group[best_flex].free_blocks)) return -1; found_flexbg: @@ -410,6 +416,42 @@ out: return 0; } +struct orlov_stats { + __u32 free_inodes; + __u32 free_blocks; + __u32 used_dirs; +}; + +/* + * Helper function for Orlov's allocator; returns critical information + * for a particular block group or flex_bg. If flex_size is 1, then g + * is a block group number; otherwise it is flex_bg number. + */ +void get_orlov_stats(struct super_block *sb, ext4_group_t g, + int flex_size, struct orlov_stats *stats) +{ + struct ext4_group_desc *desc; + struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; + + if (flex_size > 1) { + stats->free_inodes = atomic_read(&flex_group[g].free_inodes); + stats->free_blocks = atomic_read(&flex_group[g].free_blocks); + stats->used_dirs = atomic_read(&flex_group[g].used_dirs); + return; + } + + desc = ext4_get_group_desc(sb, g, NULL); + if (desc) { + stats->free_inodes = ext4_free_inodes_count(sb, desc); + stats->free_blocks = ext4_free_blks_count(sb, desc); + stats->used_dirs = ext4_used_dirs_count(sb, desc); + } else { + stats->free_inodes = 0; + stats->free_blocks = 0; + stats->used_dirs = 0; + } +} + /* * Orlov's allocator for directories. * @@ -425,35 +467,34 @@ out: * it has too many directories already (max_dirs) or * it has too few free inodes left (min_inodes) or * it has too few free blocks left (min_blocks) or - * it's already running too large debt (max_debt). * Parent's group is preferred, if it doesn't satisfy these * conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more * free inodes than average (starting at parent's group). - * - * Debt is incremented each time we allocate a directory and decremented - * when we allocate an inode, within 0--255. */ -#define INODE_COST 64 -#define BLOCK_COST 256 - static int find_group_orlov(struct super_block *sb, struct inode *parent, - ext4_group_t *group) + ext4_group_t *group, int mode) { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; ext4_group_t ngroups = sbi->s_groups_count; int inodes_per_group = EXT4_INODES_PER_GROUP(sb); unsigned int freei, avefreei; ext4_fsblk_t freeb, avefreeb; - ext4_fsblk_t blocks_per_dir; unsigned int ndirs; - int max_debt, max_dirs, min_inodes; + int max_dirs, min_inodes; ext4_grpblk_t min_blocks; - ext4_group_t i; + ext4_group_t i, grp, g; struct ext4_group_desc *desc; + struct orlov_stats stats; + int flex_size = ext4_flex_bg_size(sbi); + + if (flex_size > 1) { + ngroups = (ngroups + flex_size - 1) >> + sbi->s_log_groups_per_flex; + parent_group >>= sbi->s_log_groups_per_flex; + } freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); avefreei = freei / ngroups; @@ -462,71 +503,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, do_div(avefreeb, ngroups); ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); - if ((parent == sb->s_root->d_inode) || - (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { + if (S_ISDIR(mode) && + ((parent == sb->s_root->d_inode) || + (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) { int best_ndir = inodes_per_group; - ext4_group_t grp; int ret = -1; get_random_bytes(&grp, sizeof(grp)); parent_group = (unsigned)grp % ngroups; for (i = 0; i < ngroups; i++) { - grp = (parent_group + i) % ngroups; - desc = ext4_get_group_desc(sb, grp, NULL); - if (!desc || !ext4_free_inodes_count(sb, desc)) + g = (parent_group + i) % ngroups; + get_orlov_stats(sb, g, flex_size, &stats); + if (!stats.free_inodes) continue; - if (ext4_used_dirs_count(sb, desc) >= best_ndir) + if (stats.used_dirs >= best_ndir) continue; - if (ext4_free_inodes_count(sb, desc) < avefreei) + if (stats.free_inodes < avefreei) continue; - if (ext4_free_blks_count(sb, desc) < avefreeb) + if (stats.free_blocks < avefreeb) continue; - *group = grp; + grp = g; ret = 0; - best_ndir = ext4_used_dirs_count(sb, desc); + best_ndir = stats.used_dirs; + } + if (ret) + goto fallback; + found_flex_bg: + if (flex_size == 1) { + *group = grp; + return 0; + } + + /* + * We pack inodes at the beginning of the flexgroup's + * inode tables. Block allocation decisions will do + * something similar, although regular files will + * start at 2nd block group of the flexgroup. See + * ext4_ext_find_goal() and ext4_find_near(). + */ + grp *= flex_size; + for (i = 0; i < flex_size; i++) { + if (grp+i >= sbi->s_groups_count) + break; + desc = ext4_get_group_desc(sb, grp+i, NULL); + if (desc && ext4_free_inodes_count(sb, desc)) { + *group = grp+i; + return 0; + } } - if (ret == 0) - return ret; goto fallback; } - blocks_per_dir = ext4_blocks_count(es) - freeb; - do_div(blocks_per_dir, ndirs); - max_dirs = ndirs / ngroups + inodes_per_group / 16; - min_inodes = avefreei - inodes_per_group / 4; - min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4; - - max_debt = EXT4_BLOCKS_PER_GROUP(sb); - max_debt /= max_t(int, blocks_per_dir, BLOCK_COST); - if (max_debt * INODE_COST > inodes_per_group) - max_debt = inodes_per_group / INODE_COST; - if (max_debt > 255) - max_debt = 255; - if (max_debt == 0) - max_debt = 1; + min_inodes = avefreei - inodes_per_group*flex_size / 4; + if (min_inodes < 1) + min_inodes = 1; + min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; + + /* + * Start looking in the flex group where we last allocated an + * inode for this parent directory + */ + if (EXT4_I(parent)->i_last_alloc_group != ~0) { + parent_group = EXT4_I(parent)->i_last_alloc_group; + if (flex_size > 1) + parent_group >>= sbi->s_log_groups_per_flex; + } for (i = 0; i < ngroups; i++) { - *group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc(sb, *group, NULL); - if (!desc || !ext4_free_inodes_count(sb, desc)) - continue; - if (ext4_used_dirs_count(sb, desc) >= max_dirs) + grp = (parent_group + i) % ngroups; + get_orlov_stats(sb, grp, flex_size, &stats); + if (stats.used_dirs >= max_dirs) continue; - if (ext4_free_inodes_count(sb, desc) < min_inodes) + if (stats.free_inodes < min_inodes) continue; - if (ext4_free_blks_count(sb, desc) < min_blocks) + if (stats.free_blocks < min_blocks) continue; - return 0; + goto found_flex_bg; } fallback: + ngroups = sbi->s_groups_count; + avefreei = freei / ngroups; + parent_group = EXT4_I(parent)->i_block_group; for (i = 0; i < ngroups; i++) { - *group = (parent_group + i) % ngroups; - desc = ext4_get_group_desc(sb, *group, NULL); + grp = (parent_group + i) % ngroups; + desc = ext4_get_group_desc(sb, grp, NULL); if (desc && ext4_free_inodes_count(sb, desc) && - ext4_free_inodes_count(sb, desc) >= avefreei) + ext4_free_inodes_count(sb, desc) >= avefreei) { + *group = grp; return 0; + } } if (avefreei) { @@ -542,12 +609,51 @@ fallback: } static int find_group_other(struct super_block *sb, struct inode *parent, - ext4_group_t *group) + ext4_group_t *group, int mode) { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *desc; - ext4_group_t i; + ext4_group_t i, last; + int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); + + /* + * Try to place the inode is the same flex group as its + * parent. If we can't find space, use the Orlov algorithm to + * find another flex group, and store that information in the + * parent directory's inode information so that use that flex + * group for future allocations. + */ + if (flex_size > 1) { + int retry = 0; + + try_again: + parent_group &= ~(flex_size-1); + last = parent_group + flex_size; + if (last > ngroups) + last = ngroups; + for (i = parent_group; i < last; i++) { + desc = ext4_get_group_desc(sb, i, NULL); + if (desc && ext4_free_inodes_count(sb, desc)) { + *group = i; + return 0; + } + } + if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) { + retry = 1; + parent_group = EXT4_I(parent)->i_last_alloc_group; + goto try_again; + } + /* + * If this didn't work, use the Orlov search algorithm + * to find a new flex group; we pass in the mode to + * avoid the topdir algorithms. + */ + *group = parent_group + flex_size; + if (*group > ngroups) + *group = 0; + return find_group_orlov(sb, parent, group, mode); + } /* * Try to place the inode in its parent directory @@ -665,6 +771,11 @@ static int ext4_claim_inode(struct super_block *sb, if (S_ISDIR(mode)) { count = ext4_used_dirs_count(sb, gdp) + 1; ext4_used_dirs_set(sb, gdp, count); + if (sbi->s_log_groups_per_flex) { + ext4_group_t f = ext4_flex_group(sbi, group); + + atomic_inc(&sbi->s_flex_groups[f].free_inodes); + } } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); err_ret: @@ -716,10 +827,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) sbi = EXT4_SB(sb); es = sbi->s_es; - if (sbi->s_log_groups_per_flex) { + if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { ret2 = find_group_flex(sb, dir, &group); if (ret2 == -1) { - ret2 = find_group_other(sb, dir, &group); + ret2 = find_group_other(sb, dir, &group, mode); if (ret2 == 0 && once) once = 0; printk(KERN_NOTICE "ext4: find_group_flex " @@ -733,11 +844,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) if (test_opt(sb, OLDALLOC)) ret2 = find_group_dir(sb, dir, &group); else - ret2 = find_group_orlov(sb, dir, &group); + ret2 = find_group_orlov(sb, dir, &group, mode); } else - ret2 = find_group_other(sb, dir, &group); + ret2 = find_group_other(sb, dir, &group, mode); got_group: + EXT4_I(dir)->i_last_alloc_group = group; err = -ENOSPC; if (ret2 == -1) goto out; @@ -858,9 +970,7 @@ got: if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); - spin_lock(sb_bgl_lock(sbi, flex_group)); - sbi->s_flex_groups[flex_group].free_inodes--; - spin_unlock(sb_bgl_lock(sbi, flex_group)); + atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); } inode->i_uid = current_fsuid(); @@ -885,19 +995,16 @@ got: ei->i_disksize = 0; /* - * Don't inherit extent flag from directory. We set extent flag on - * newly created directory and file only if -o extent mount option is - * specified + * Don't inherit extent flag from directory, amongst others. We set + * extent flag on newly created directory and file only if -o extent + * mount option is specified */ - ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL); - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL); - /* dirsync only applies to directories */ - if (!S_ISDIR(mode)) - ei->i_flags &= ~EXT4_DIRSYNC_FL; + ei->i_flags = + ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); ei->i_file_acl = 0; ei->i_dtime = 0; ei->i_block_group = group; + ei->i_last_alloc_group = ~0; ext4_set_inode_flags(inode); if (IS_DIRSYNC(inode)) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 71d3ecd5db79..a2e7952bc5f9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -371,6 +371,34 @@ static int ext4_block_to_path(struct inode *inode, return n; } +static int __ext4_check_blockref(const char *function, struct inode *inode, + unsigned int *p, unsigned int max) { + + unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); + unsigned int *bref = p; + while (bref < p+max) { + if (unlikely(*bref >= maxblocks)) { + ext4_error(inode->i_sb, function, + "block reference %u >= max (%u) " + "in inode #%lu, offset=%d", + *bref, maxblocks, + inode->i_ino, (int)(bref-p)); + return -EIO; + } + bref++; + } + return 0; +} + + +#define ext4_check_indirect_blockref(inode, bh) \ + __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ + EXT4_ADDR_PER_BLOCK((inode)->i_sb)) + +#define ext4_check_inode_blockref(inode) \ + __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ + EXT4_NDIR_BLOCKS) + /** * ext4_get_branch - read the chain of indirect blocks leading to data * @inode: inode in question @@ -415,9 +443,22 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, if (!p->key) goto no_block; while (--depth) { - bh = sb_bread(sb, le32_to_cpu(p->key)); - if (!bh) + bh = sb_getblk(sb, le32_to_cpu(p->key)); + if (unlikely(!bh)) goto failure; + + if (!bh_uptodate_or_lock(bh)) { + if (bh_submit_read(bh) < 0) { + put_bh(bh); + goto failure; + } + /* validate block references */ + if (ext4_check_indirect_blockref(inode, bh)) { + put_bh(bh); + goto failure; + } + } + add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); /* Reader: end */ if (!p->key) @@ -459,6 +500,8 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) ext4_fsblk_t bg_start; ext4_fsblk_t last_block; ext4_grpblk_t colour; + ext4_group_t block_group; + int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); /* Try to find previous block */ for (p = ind->p - 1; p >= start; p--) { @@ -474,9 +517,22 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) * It is going to be referred to from the inode itself? OK, just put it * into the same cylinder group then. */ - bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); + block_group = ei->i_block_group; + if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { + block_group &= ~(flex_size-1); + if (S_ISREG(inode->i_mode)) + block_group++; + } + bg_start = ext4_group_first_block_no(inode->i_sb, block_group); last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + /* + * If we are doing delayed allocation, we don't need take + * colour into account. + */ + if (test_opt(inode->i_sb, DELALLOC)) + return bg_start; + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) colour = (current->pid % 16) * (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); @@ -1052,9 +1108,16 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) /* * free those over-booking quota for metadata blocks */ - if (mdb_free) vfs_dq_release_reservation_block(inode, mdb_free); + + /* + * If we have done all the pending block allocations and if + * there aren't any writers on the inode, we can discard the + * inode's preallocations. + */ + if (!total && (atomic_read(&inode->i_writecount) == 0)) + ext4_discard_preallocations(inode); } /* @@ -1688,9 +1751,10 @@ static void ext4_da_page_release_reservation(struct page *page, struct mpage_da_data { struct inode *inode; - struct buffer_head lbh; /* extent of blocks */ + sector_t b_blocknr; /* start block number of extent */ + size_t b_size; /* size of extent */ + unsigned long b_state; /* state of the extent */ unsigned long first_page, next_page; /* extent of pages */ - get_block_t *get_block; struct writeback_control *wbc; int io_done; int pages_written; @@ -1704,7 +1768,6 @@ struct mpage_da_data { * @mpd->inode: inode * @mpd->first_page: first page of the extent * @mpd->next_page: page after the last page of the extent - * @mpd->get_block: the filesystem's block mapper function * * By the time mpage_da_submit_io() is called we expect all blocks * to be allocated. this may be wrong if allocation failed. @@ -1724,7 +1787,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) /* * We need to start from the first_page to the next_page - 1 * to make sure we also write the mapped dirty buffer_heads. - * If we look at mpd->lbh.b_blocknr we would only be looking + * If we look at mpd->b_blocknr we would only be looking * at the currently mapped buffer_heads. */ index = mpd->first_page; @@ -1914,68 +1977,111 @@ static void ext4_print_free_blocks(struct inode *inode) return; } +#define EXT4_DELALLOC_RSVED 1 +static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + int ret; + unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; + loff_t disksize = EXT4_I(inode)->i_disksize; + handle_t *handle = NULL; + + handle = ext4_journal_current_handle(); + BUG_ON(!handle); + ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, + bh_result, create, 0, EXT4_DELALLOC_RSVED); + if (ret <= 0) + return ret; + + bh_result->b_size = (ret << inode->i_blkbits); + + if (ext4_should_order_data(inode)) { + int retval; + retval = ext4_jbd2_file_inode(handle, inode); + if (retval) + /* + * Failed to add inode for ordered mode. Don't + * update file size + */ + return retval; + } + + /* + * Update on-disk size along with block allocation we don't + * use 'extend_disksize' as size may change within already + * allocated block -bzzz + */ + disksize = ((loff_t) iblock + ret) << inode->i_blkbits; + if (disksize > i_size_read(inode)) + disksize = i_size_read(inode); + if (disksize > EXT4_I(inode)->i_disksize) { + ext4_update_i_disksize(inode, disksize); + ret = ext4_mark_inode_dirty(handle, inode); + return ret; + } + return 0; +} + /* * mpage_da_map_blocks - go through given space * - * @mpd->lbh - bh describing space - * @mpd->get_block - the filesystem's block mapper function + * @mpd - bh describing space * * The function skips space we know is already mapped to disk blocks. * */ -static int mpage_da_map_blocks(struct mpage_da_data *mpd) +static int mpage_da_map_blocks(struct mpage_da_data *mpd) { int err = 0; struct buffer_head new; - struct buffer_head *lbh = &mpd->lbh; sector_t next; /* * We consider only non-mapped and non-allocated blocks */ - if (buffer_mapped(lbh) && !buffer_delay(lbh)) + if ((mpd->b_state & (1 << BH_Mapped)) && + !(mpd->b_state & (1 << BH_Delay))) return 0; - new.b_state = lbh->b_state; + new.b_state = mpd->b_state; new.b_blocknr = 0; - new.b_size = lbh->b_size; - next = lbh->b_blocknr; + new.b_size = mpd->b_size; + next = mpd->b_blocknr; /* * If we didn't accumulate anything * to write simply return */ if (!new.b_size) return 0; - err = mpd->get_block(mpd->inode, next, &new, 1); - if (err) { - /* If get block returns with error - * we simply return. Later writepage - * will redirty the page and writepages - * will find the dirty page again + err = ext4_da_get_block_write(mpd->inode, next, &new, 1); + if (err) { + /* + * If get block returns with error we simply + * return. Later writepage will redirty the page and + * writepages will find the dirty page again */ if (err == -EAGAIN) return 0; if (err == -ENOSPC && - ext4_count_free_blocks(mpd->inode->i_sb)) { + ext4_count_free_blocks(mpd->inode->i_sb)) { mpd->retval = err; return 0; } /* - * get block failure will cause us - * to loop in writepages. Because - * a_ops->writepage won't be able to - * make progress. The page will be redirtied - * by writepage and writepages will again - * try to write the same. + * get block failure will cause us to loop in + * writepages, because a_ops->writepage won't be able + * to make progress. The page will be redirtied by + * writepage and writepages will again try to write + * the same. */ printk(KERN_EMERG "%s block allocation failed for inode %lu " "at logical offset %llu with max blocks " "%zd with error %d\n", __func__, mpd->inode->i_ino, (unsigned long long)next, - lbh->b_size >> mpd->inode->i_blkbits, err); + mpd->b_size >> mpd->inode->i_blkbits, err); printk(KERN_EMERG "This should not happen.!! " "Data will be lost\n"); if (err == -ENOSPC) { @@ -1983,7 +2089,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) } /* invlaidate all the pages */ ext4_da_block_invalidatepages(mpd, next, - lbh->b_size >> mpd->inode->i_blkbits); + mpd->b_size >> mpd->inode->i_blkbits); return err; } BUG_ON(new.b_size == 0); @@ -1995,7 +2101,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * If blocks are delayed marked, we need to * put actual blocknr and drop delayed bit */ - if (buffer_delay(lbh) || buffer_unwritten(lbh)) + if ((mpd->b_state & (1 << BH_Delay)) || + (mpd->b_state & (1 << BH_Unwritten))) mpage_put_bnr_to_bhs(mpd, next, &new); return 0; @@ -2014,12 +2121,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * the function is used to collect contig. blocks in same state */ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, - sector_t logical, struct buffer_head *bh) + sector_t logical, size_t b_size, + unsigned long b_state) { sector_t next; - size_t b_size = bh->b_size; - struct buffer_head *lbh = &mpd->lbh; - int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; + int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; /* check if thereserved journal credits might overflow */ if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { @@ -2046,19 +2152,19 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, /* * First block in the extent */ - if (lbh->b_size == 0) { - lbh->b_blocknr = logical; - lbh->b_size = b_size; - lbh->b_state = bh->b_state & BH_FLAGS; + if (mpd->b_size == 0) { + mpd->b_blocknr = logical; + mpd->b_size = b_size; + mpd->b_state = b_state & BH_FLAGS; return; } - next = lbh->b_blocknr + nrblocks; + next = mpd->b_blocknr + nrblocks; /* * Can we merge the block to our big extent? */ - if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { - lbh->b_size += b_size; + if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { + mpd->b_size += b_size; return; } @@ -2087,7 +2193,7 @@ static int __mpage_da_writepage(struct page *page, { struct mpage_da_data *mpd = data; struct inode *inode = mpd->inode; - struct buffer_head *bh, *head, fake; + struct buffer_head *bh, *head; sector_t logical; if (mpd->io_done) { @@ -2129,9 +2235,9 @@ static int __mpage_da_writepage(struct page *page, /* * ... and blocks */ - mpd->lbh.b_size = 0; - mpd->lbh.b_state = 0; - mpd->lbh.b_blocknr = 0; + mpd->b_size = 0; + mpd->b_state = 0; + mpd->b_blocknr = 0; } mpd->next_page = page->index + 1; @@ -2139,16 +2245,8 @@ static int __mpage_da_writepage(struct page *page, (PAGE_CACHE_SHIFT - inode->i_blkbits); if (!page_has_buffers(page)) { - /* - * There is no attached buffer heads yet (mmap?) - * we treat the page asfull of dirty blocks - */ - bh = &fake; - bh->b_size = PAGE_CACHE_SIZE; - bh->b_state = 0; - set_buffer_dirty(bh); - set_buffer_uptodate(bh); - mpage_add_bh_to_extent(mpd, logical, bh); + mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE, + (1 << BH_Dirty) | (1 << BH_Uptodate)); if (mpd->io_done) return MPAGE_DA_EXTENT_TAIL; } else { @@ -2166,8 +2264,10 @@ static int __mpage_da_writepage(struct page *page, * with the page in ext4_da_writepage */ if (buffer_dirty(bh) && - (!buffer_mapped(bh) || buffer_delay(bh))) { - mpage_add_bh_to_extent(mpd, logical, bh); + (!buffer_mapped(bh) || buffer_delay(bh))) { + mpage_add_bh_to_extent(mpd, logical, + bh->b_size, + bh->b_state); if (mpd->io_done) return MPAGE_DA_EXTENT_TAIL; } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { @@ -2179,9 +2279,8 @@ static int __mpage_da_writepage(struct page *page, * unmapped buffer_head later we need to * use the b_state flag of that buffer_head. */ - if (mpd->lbh.b_size == 0) - mpd->lbh.b_state = - bh->b_state & BH_FLAGS; + if (mpd->b_size == 0) + mpd->b_state = bh->b_state & BH_FLAGS; } logical++; } while ((bh = bh->b_this_page) != head); @@ -2191,51 +2290,6 @@ static int __mpage_da_writepage(struct page *page, } /* - * mpage_da_writepages - walk the list of dirty pages of the given - * address space, allocates non-allocated blocks, maps newly-allocated - * blocks to existing bhs and issue IO them - * - * @mapping: address space structure to write - * @wbc: subtract the number of written pages from *@wbc->nr_to_write - * @get_block: the filesystem's block mapper function. - * - * This is a library function, which implements the writepages() - * address_space_operation. - */ -static int mpage_da_writepages(struct address_space *mapping, - struct writeback_control *wbc, - struct mpage_da_data *mpd) -{ - int ret; - - if (!mpd->get_block) - return generic_writepages(mapping, wbc); - - mpd->lbh.b_size = 0; - mpd->lbh.b_state = 0; - mpd->lbh.b_blocknr = 0; - mpd->first_page = 0; - mpd->next_page = 0; - mpd->io_done = 0; - mpd->pages_written = 0; - mpd->retval = 0; - - ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); - /* - * Handle last extent of pages - */ - if (!mpd->io_done && mpd->next_page != mpd->first_page) { - if (mpage_da_map_blocks(mpd) == 0) - mpage_da_submit_io(mpd); - - mpd->io_done = 1; - ret = MPAGE_DA_EXTENT_TAIL; - } - wbc->nr_to_write -= mpd->pages_written; - return ret; -} - -/* * this is a special callback for ->write_begin() only * it's intention is to return mapped block or reserve space */ @@ -2274,51 +2328,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return ret; } -#define EXT4_DELALLOC_RSVED 1 -static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - int ret; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - loff_t disksize = EXT4_I(inode)->i_disksize; - handle_t *handle = NULL; - - handle = ext4_journal_current_handle(); - BUG_ON(!handle); - ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, - bh_result, create, 0, EXT4_DELALLOC_RSVED); - if (ret > 0) { - - bh_result->b_size = (ret << inode->i_blkbits); - - if (ext4_should_order_data(inode)) { - int retval; - retval = ext4_jbd2_file_inode(handle, inode); - if (retval) - /* - * Failed to add inode for ordered - * mode. Don't update file size - */ - return retval; - } - - /* - * Update on-disk size along with block allocation - * we don't use 'extend_disksize' as size may change - * within already allocated block -bzzz - */ - disksize = ((loff_t) iblock + ret) << inode->i_blkbits; - if (disksize > i_size_read(inode)) - disksize = i_size_read(inode); - if (disksize > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, disksize); - ret = ext4_mark_inode_dirty(handle, inode); - return ret; - } - ret = 0; - } - return ret; -} static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) { @@ -2569,8 +2578,38 @@ retry: dump_stack(); goto out_writepages; } - mpd.get_block = ext4_da_get_block_write; - ret = mpage_da_writepages(mapping, wbc, &mpd); + + /* + * Now call __mpage_da_writepage to find the next + * contiguous region of logical blocks that need + * blocks to be allocated by ext4. We don't actually + * submit the blocks for I/O here, even though + * write_cache_pages thinks it will, and will set the + * pages as clean for write before calling + * __mpage_da_writepage(). + */ + mpd.b_size = 0; + mpd.b_state = 0; + mpd.b_blocknr = 0; + mpd.first_page = 0; + mpd.next_page = 0; + mpd.io_done = 0; + mpd.pages_written = 0; + mpd.retval = 0; + ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, + &mpd); + /* + * If we have a contigous extent of pages and we + * haven't done the I/O yet, map the blocks and submit + * them for I/O. + */ + if (!mpd.io_done && mpd.next_page != mpd.first_page) { + if (mpage_da_map_blocks(&mpd) == 0) + mpage_da_submit_io(&mpd); + mpd.io_done = 1; + ret = MPAGE_DA_EXTENT_TAIL; + } + wbc->nr_to_write -= mpd.pages_written; ext4_journal_stop(handle); @@ -2846,6 +2885,48 @@ out: return; } +/* + * Force all delayed allocation blocks to be allocated for a given inode. + */ +int ext4_alloc_da_blocks(struct inode *inode) +{ + if (!EXT4_I(inode)->i_reserved_data_blocks && + !EXT4_I(inode)->i_reserved_meta_blocks) + return 0; + + /* + * We do something simple for now. The filemap_flush() will + * also start triggering a write of the data blocks, which is + * not strictly speaking necessary (and for users of + * laptop_mode, not even desirable). However, to do otherwise + * would require replicating code paths in: + * + * ext4_da_writepages() -> + * write_cache_pages() ---> (via passed in callback function) + * __mpage_da_writepage() --> + * mpage_add_bh_to_extent() + * mpage_da_map_blocks() + * + * The problem is that write_cache_pages(), located in + * mm/page-writeback.c, marks pages clean in preparation for + * doing I/O, which is not desirable if we're not planning on + * doing I/O at all. + * + * We could call write_cache_pages(), and then redirty all of + * the pages by calling redirty_page_for_writeback() but that + * would be ugly in the extreme. So instead we would need to + * replicate parts of the code in the above functions, + * simplifying them becuase we wouldn't actually intend to + * write out the pages, but rather only collect contiguous + * logical block extents, call the multi-block allocator, and + * then update the buffer heads with the block allocations. + * + * For now, though, we'll cheat by calling filemap_flush(), + * which will map the blocks, and start the I/O, but not + * actually wait for the I/O to complete. + */ + return filemap_flush(inode->i_mapping); +} /* * bmap() is special. It gets used by applications such as lilo and by @@ -3868,6 +3949,9 @@ void ext4_truncate(struct inode *inode) if (!ext4_can_truncate(inode)) return; + if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) + ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { ext4_ext_truncate(inode); return; @@ -4110,12 +4194,7 @@ make_io: unsigned num; table = ext4_inode_table(sb, gdp); - /* Make sure s_inode_readahead_blks is a power of 2 */ - while (EXT4_SB(sb)->s_inode_readahead_blks & - (EXT4_SB(sb)->s_inode_readahead_blks-1)) - EXT4_SB(sb)->s_inode_readahead_blks = - (EXT4_SB(sb)->s_inode_readahead_blks & - (EXT4_SB(sb)->s_inode_readahead_blks-1)); + /* s_inode_readahead_blks is always a power of 2 */ b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); if (table > b) b = table; @@ -4287,6 +4366,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_disksize = inode->i_size; inode->i_generation = le32_to_cpu(raw_inode->i_generation); ei->i_block_group = iloc.block_group; + ei->i_last_alloc_group = ~0; /* * NOTE! The in-memory inode i_data array is in little-endian order * even on big-endian machines: we do NOT byteswap the block numbers! @@ -4329,6 +4409,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; } + if (ei->i_flags & EXT4_EXTENTS_FL) { + /* Validate extent which is part of inode */ + ret = ext4_ext_check_inode(inode); + } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + (S_ISLNK(inode->i_mode) && + !ext4_inode_is_fast_symlink(inode))) { + /* Validate block references which are part of inode */ + ret = ext4_check_inode_blockref(inode); + } + if (ret) { + brelse(bh); + goto bad_inode; + } + if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; @@ -4345,7 +4439,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext4_symlink_inode_operations; ext4_set_aops(inode); } - } else { + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { inode->i_op = &ext4_special_inode_operations; if (raw_inode->i_block[0]) init_special_inode(inode, inode->i_mode, @@ -4353,6 +4448,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) else init_special_inode(inode, inode->i_mode, new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); + } else { + brelse(bh); + ret = -EIO; + ext4_error(inode->i_sb, __func__, + "bogus i_mode (%o) for inode=%lu", + inode->i_mode, inode->i_ino); + goto bad_inode; } brelse(iloc.bh); ext4_set_inode_flags(inode); @@ -5146,8 +5248,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) return !buffer_mapped(bh); } -int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) +int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; loff_t size; unsigned long len; int ret = -EINVAL; @@ -5199,6 +5302,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) goto out_unlock; ret = 0; out_unlock: + if (ret) + ret = VM_FAULT_SIGBUS; up_read(&inode->i_alloc_sem); return ret; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 42dc83fb247a..91e75f7a9e73 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -48,8 +48,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err) return err; - if (!S_ISDIR(inode->i_mode)) - flags &= ~EXT4_DIRSYNC_FL; + flags = ext4_mask_flags(inode->i_mode, flags); err = -EPERM; mutex_lock(&inode->i_mutex); @@ -263,6 +262,20 @@ setversion_out: return err; } + case EXT4_IOC_ALLOC_DA_BLKS: + { + int err; + if (!is_owner_or_cap(inode)) + return -EACCES; + + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + err = ext4_alloc_da_blocks(inode); + mnt_drop_write(filp->f_path.mnt); + return err; + } + default: return -ENOTTY; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b038188bd039..f871677a7984 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -46,22 +46,23 @@ * The allocation request involve request for multiple number of blocks * near to the goal(block) value specified. * - * During initialization phase of the allocator we decide to use the group - * preallocation or inode preallocation depending on the size file. The - * size of the file could be the resulting file size we would have after - * allocation or the current file size which ever is larger. If the size is - * less that sbi->s_mb_stream_request we select the group - * preallocation. The default value of s_mb_stream_request is 16 - * blocks. This can also be tuned via - * /proc/fs/ext4/<partition>/stream_req. The value is represented in terms - * of number of blocks. + * During initialization phase of the allocator we decide to use the + * group preallocation or inode preallocation depending on the size of + * the file. The size of the file could be the resulting file size we + * would have after allocation, or the current file size, which ever + * is larger. If the size is less than sbi->s_mb_stream_request we + * select to use the group preallocation. The default value of + * s_mb_stream_request is 16 blocks. This can also be tuned via + * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in + * terms of number of blocks. * * The main motivation for having small file use group preallocation is to - * ensure that we have small file closer in the disk. + * ensure that we have small files closer together on the disk. * - * First stage the allocator looks at the inode prealloc list - * ext4_inode_info->i_prealloc_list contain list of prealloc spaces for - * this particular inode. The inode prealloc space is represented as: + * First stage the allocator looks at the inode prealloc list, + * ext4_inode_info->i_prealloc_list, which contains list of prealloc + * spaces for this particular inode. The inode prealloc space is + * represented as: * * pa_lstart -> the logical start block for this prealloc space * pa_pstart -> the physical start block for this prealloc space @@ -121,29 +122,29 @@ * list. In case of inode preallocation we follow a list of heuristics * based on file size. This can be found in ext4_mb_normalize_request. If * we are doing a group prealloc we try to normalize the request to - * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to + * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is * 512 blocks. This can be tuned via - * /proc/fs/ext4/<partition/group_prealloc. The value is represented in + * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in * terms of number of blocks. If we have mounted the file system with -O * stripe=<value> option the group prealloc request is normalized to the * stripe value (sbi->s_stripe) * - * The regular allocator(using the buddy cache) support few tunables. + * The regular allocator(using the buddy cache) supports few tunables. * - * /proc/fs/ext4/<partition>/min_to_scan - * /proc/fs/ext4/<partition>/max_to_scan - * /proc/fs/ext4/<partition>/order2_req + * /sys/fs/ext4/<partition>/mb_min_to_scan + * /sys/fs/ext4/<partition>/mb_max_to_scan + * /sys/fs/ext4/<partition>/mb_order2_req * - * The regular allocator use buddy scan only if the request len is power of + * The regular allocator uses buddy scan only if the request len is power of * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The * value of s_mb_order2_reqs can be tuned via - * /proc/fs/ext4/<partition>/order2_req. If the request len is equal to + * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to * stripe size (sbi->s_stripe), we try to search for contigous block in - * stripe size. This should result in better allocation on RAID setup. If - * not we search in the specific group using bitmap for best extents. The - * tunable min_to_scan and max_to_scan controll the behaviour here. + * stripe size. This should result in better allocation on RAID setups. If + * not, we search in the specific group using bitmap for best extents. The + * tunable min_to_scan and max_to_scan control the behaviour here. * min_to_scan indicate how long the mballoc __must__ look for a best - * extent and max_to_scanindicate how long the mballoc __can__ look for a + * extent and max_to_scan indicates how long the mballoc __can__ look for a * best extent in the found extents. Searching for the blocks starts with * the group specified as the goal value in allocation context via * ac_g_ex. Each group is first checked based on the criteria whether it @@ -337,8 +338,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ext4_group_t group); -static int ext4_mb_init_per_dev_proc(struct super_block *sb); -static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); @@ -1726,6 +1725,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, { unsigned free, fragments; unsigned i, bits; + int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); struct ext4_group_desc *desc; struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); @@ -1747,6 +1747,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) return 0; + /* Avoid using the first bg of a flexgroup for data files */ + if ((ac->ac_flags & EXT4_MB_HINT_DATA) && + (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && + ((group % flex_size) == 0)) + return 0; + bits = ac->ac_sb->s_blocksize_bits + 1; for (i = ac->ac_2order; i <= bits; i++) if (grp->bb_counters[i] > 0) @@ -1971,7 +1977,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) /* * We search using buddy data only if the order of the request * is greater than equal to the sbi_s_mb_order2_reqs - * You can tune it via /proc/fs/ext4/<partition>/order2_req + * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req */ if (i >= sbi->s_mb_order2_reqs) { /* @@ -2693,7 +2699,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); if (sbi->s_mb_maxs == NULL) { - kfree(sbi->s_mb_maxs); + kfree(sbi->s_mb_offsets); return -ENOMEM; } @@ -2746,7 +2752,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) spin_lock_init(&lg->lg_prealloc_lock); } - ext4_mb_init_per_dev_proc(sb); ext4_mb_history_init(sb); if (sbi->s_journal) @@ -2829,7 +2834,6 @@ int ext4_mb_release(struct super_block *sb) free_percpu(sbi->s_locality_groups); ext4_mb_history_release(sb); - ext4_mb_destroy_per_dev_proc(sb); return 0; } @@ -2890,62 +2894,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) mb_debug("freed %u blocks in %u structures\n", count, count2); } -#define EXT4_MB_STATS_NAME "stats" -#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" -#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" -#define EXT4_MB_ORDER2_REQ "order2_req" -#define EXT4_MB_STREAM_REQ "stream_req" -#define EXT4_MB_GROUP_PREALLOC "group_prealloc" - -static int ext4_mb_init_per_dev_proc(struct super_block *sb) -{ -#ifdef CONFIG_PROC_FS - mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct proc_dir_entry *proc; - - if (sbi->s_proc == NULL) - return -EINVAL; - - EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); - EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); - EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); - EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); - EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); - EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); - return 0; - -err_out: - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); - return -ENOMEM; -#else - return 0; -#endif -} - -static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) -{ -#ifdef CONFIG_PROC_FS - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (sbi->s_proc == NULL) - return -EINVAL; - - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); -#endif - return 0; -} - int __init init_ext4_mballoc(void) { ext4_pspace_cachep = @@ -3096,9 +3044,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); - spin_lock(sb_bgl_lock(sbi, flex_group)); - sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len; - spin_unlock(sb_bgl_lock(sbi, flex_group)); + atomic_sub(ac->ac_b_ex.fe_len, + &sbi->s_flex_groups[flex_group].free_blocks); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -3116,7 +3063,7 @@ out_err: * here we normalize request for locality group * Group request are normalized to s_strip size if we set the same via mount * option. If not we set it to s_mb_group_prealloc which can be configured via - * /proc/fs/ext4/<partition>/group_prealloc + * /sys/fs/ext4/<partition>/mb_group_prealloc * * XXX: should we try to preallocate more than the group has now? */ @@ -3608,8 +3555,11 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, spin_unlock(&pa->pa_lock); grp_blk = pa->pa_pstart; - /* If linear, pa_pstart may be in the next group when pa is used up */ - if (pa->pa_linear) + /* + * If doing group-based preallocation, pa_pstart may be in the + * next group when pa is used up + */ + if (pa->pa_type == MB_GROUP_PA) grp_blk--; ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL); @@ -3704,7 +3654,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) INIT_LIST_HEAD(&pa->pa_inode_list); INIT_LIST_HEAD(&pa->pa_group_list); pa->pa_deleted = 0; - pa->pa_linear = 0; + pa->pa_type = MB_INODE_PA; mb_debug("new inode pa %p: %llu/%u for %u\n", pa, pa->pa_pstart, pa->pa_len, pa->pa_lstart); @@ -3767,7 +3717,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) INIT_LIST_HEAD(&pa->pa_inode_list); INIT_LIST_HEAD(&pa->pa_group_list); pa->pa_deleted = 0; - pa->pa_linear = 1; + pa->pa_type = MB_GROUP_PA; mb_debug("new group pa %p: %llu/%u for %u\n", pa, pa->pa_pstart, pa->pa_len, pa->pa_lstart); @@ -4021,7 +3971,7 @@ repeat: list_del_rcu(&pa->pa_inode_list); spin_unlock(pa->pa_obj_lock); - if (pa->pa_linear) + if (pa->pa_type == MB_GROUP_PA) ext4_mb_release_group_pa(&e4b, pa, ac); else ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); @@ -4121,7 +4071,7 @@ repeat: spin_unlock(&ei->i_prealloc_lock); list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { - BUG_ON(pa->pa_linear != 0); + BUG_ON(pa->pa_type != MB_INODE_PA); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); err = ext4_mb_load_buddy(sb, group, &e4b); @@ -4232,7 +4182,7 @@ static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac) * file is determined by the current size or the resulting size after * allocation which ever is larger * - * One can tune this size via /proc/fs/ext4/<partition>/stream_req + * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req */ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) { @@ -4373,7 +4323,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, continue; } /* only lg prealloc space */ - BUG_ON(!pa->pa_linear); + BUG_ON(pa->pa_type != MB_GROUP_PA); /* seems this one can be freed ... */ pa->pa_deleted = 1; @@ -4442,7 +4392,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) pa_inode_list) { spin_lock(&tmp_pa->pa_lock); if (tmp_pa->pa_deleted) { - spin_unlock(&pa->pa_lock); + spin_unlock(&tmp_pa->pa_lock); continue; } if (!added && pa->pa_free < tmp_pa->pa_free) { @@ -4479,7 +4429,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) { struct ext4_prealloc_space *pa = ac->ac_pa; if (pa) { - if (pa->pa_linear) { + if (pa->pa_type == MB_GROUP_PA) { /* see comment in ext4_mb_use_group_pa() */ spin_lock(&pa->pa_lock); pa->pa_pstart += ac->ac_b_ex.fe_len; @@ -4499,7 +4449,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) * doesn't grow big. We need to release * alloc_semp before calling ext4_mb_add_n_trim() */ - if (pa->pa_linear && likely(pa->pa_free)) { + if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { spin_lock(pa->pa_obj_lock); list_del_rcu(&pa->pa_inode_list); spin_unlock(pa->pa_obj_lock); @@ -4936,9 +4886,7 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - spin_lock(sb_bgl_lock(sbi, flex_group)); - sbi->s_flex_groups[flex_group].free_blocks += count; - spin_unlock(sb_bgl_lock(sbi, flex_group)); + atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); } ext4_mb_release_desc(&e4b); diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 10a2921baf14..dd9e6cd5f6cf 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -132,12 +132,15 @@ struct ext4_prealloc_space { ext4_lblk_t pa_lstart; /* log. block */ unsigned short pa_len; /* len of preallocated chunk */ unsigned short pa_free; /* how many blocks are free */ - unsigned short pa_linear; /* consumed in one direction - * strictly, for grp prealloc */ + unsigned short pa_type; /* pa type. inode or group */ spinlock_t *pa_obj_lock; struct inode *pa_inode; /* hack, for history only */ }; +enum { + MB_INODE_PA = 0, + MB_GROUP_PA = 1 +}; struct ext4_free_extent { ext4_lblk_t fe_logical; @@ -247,7 +250,6 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, struct ext4_free_extent *fex) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 83410244d3ee..22098e1cd085 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -161,12 +161,12 @@ static struct dx_frame *dx_probe(const struct qstr *d_name, struct dx_frame *frame, int *err); static void dx_release(struct dx_frame *frames); -static int dx_make_map(struct ext4_dir_entry_2 *de, int size, +static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, struct dx_hash_info *hinfo, struct dx_map_entry map[]); static void dx_sort_map(struct dx_map_entry *map, unsigned count); static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, - struct dx_map_entry *offsets, int count); -static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size); + struct dx_map_entry *offsets, int count, unsigned blocksize); +static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize); static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block); static int ext4_htree_next_block(struct inode *dir, __u32 hash, @@ -180,14 +180,38 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode); +unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) +{ + unsigned len = le16_to_cpu(dlen); + + if (len == EXT4_MAX_REC_LEN || len == 0) + return blocksize; + return (len & 65532) | ((len & 3) << 16); +} + +__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) +{ + if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) + BUG(); + if (len < 65536) + return cpu_to_le16(len); + if (len == blocksize) { + if (blocksize == 65536) + return cpu_to_le16(EXT4_MAX_REC_LEN); + else + return cpu_to_le16(0); + } + return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); +} + /* * p is at least 6 bytes before the end of page */ static inline struct ext4_dir_entry_2 * -ext4_next_entry(struct ext4_dir_entry_2 *p) +ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize) { return (struct ext4_dir_entry_2 *)((char *)p + - ext4_rec_len_from_disk(p->rec_len)); + ext4_rec_len_from_disk(p->rec_len, blocksize)); } /* @@ -294,7 +318,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent space += EXT4_DIR_REC_LEN(de->name_len); names++; } - de = ext4_next_entry(de); + de = ext4_next_entry(de, size); } printk("(%i)\n", names); return (struct stats) { names, space, 1 }; @@ -585,7 +609,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, top = (struct ext4_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0)); - for (; de < top; de = ext4_next_entry(de)) { + for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) +((char *)de - bh->b_data))) { @@ -663,7 +687,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, } if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; - de = ext4_next_entry(de); + de = ext4_next_entry(de, dir->i_sb->s_blocksize); if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0) goto errout; count++; @@ -713,15 +737,15 @@ errout: * Create map of hash values, offsets, and sizes, stored at end of block. * Returns number of entries mapped. */ -static int dx_make_map (struct ext4_dir_entry_2 *de, int size, - struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) +static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, + struct dx_hash_info *hinfo, + struct dx_map_entry *map_tail) { int count = 0; char *base = (char *) de; struct dx_hash_info h = *hinfo; - while ((char *) de < base + size) - { + while ((char *) de < base + blocksize) { if (de->name_len && de->inode) { ext4fs_dirhash(de->name, de->name_len, &h); map_tail--; @@ -732,7 +756,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, cond_resched(); } /* XXX: do we need to check rec_len == 0 case? -Chris */ - de = ext4_next_entry(de); + de = ext4_next_entry(de, blocksize); } return count; } @@ -832,7 +856,8 @@ static inline int search_dirblock(struct buffer_head *bh, return 1; } /* prevent looping on a bad block */ - de_len = ext4_rec_len_from_disk(de->rec_len); + de_len = ext4_rec_len_from_disk(de->rec_len, + dir->i_sb->s_blocksize); if (de_len <= 0) return -1; offset += de_len; @@ -996,7 +1021,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q de = (struct ext4_dir_entry_2 *) bh->b_data; top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - EXT4_DIR_REC_LEN(0)); - for (; de < top; de = ext4_next_entry(de)) { + for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) { int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) + ((char *) de - bh->b_data); @@ -1052,8 +1077,16 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru return ERR_PTR(-EIO); } inode = ext4_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (unlikely(IS_ERR(inode))) { + if (PTR_ERR(inode) == -ESTALE) { + ext4_error(dir->i_sb, __func__, + "deleted inode referenced: %u", + ino); + return ERR_PTR(-EIO); + } else { + return ERR_CAST(inode); + } + } } return d_splice_alias(inode, dentry); } @@ -1109,7 +1142,8 @@ static inline void ext4_set_de_type(struct super_block *sb, * Returns pointer to last entry moved. */ static struct ext4_dir_entry_2 * -dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) +dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, + unsigned blocksize) { unsigned rec_len = 0; @@ -1118,7 +1152,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) rec_len = EXT4_DIR_REC_LEN(de->name_len); memcpy (to, de, rec_len); ((struct ext4_dir_entry_2 *) to)->rec_len = - ext4_rec_len_to_disk(rec_len); + ext4_rec_len_to_disk(rec_len, blocksize); de->inode = 0; map++; to += rec_len; @@ -1130,19 +1164,19 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) * Compact each dir entry in the range to the minimal rec_len. * Returns pointer to last entry in range. */ -static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) +static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize) { struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; unsigned rec_len = 0; prev = to = de; - while ((char*)de < base + size) { - next = ext4_next_entry(de); + while ((char*)de < base + blocksize) { + next = ext4_next_entry(de, blocksize); if (de->inode && de->name_len) { rec_len = EXT4_DIR_REC_LEN(de->name_len); if (de > to) memmove(to, de, rec_len); - to->rec_len = ext4_rec_len_to_disk(rec_len); + to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize); prev = to; to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len); } @@ -1215,10 +1249,12 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, hash2, split, count-split)); /* Fancy dance to stay within two buffers */ - de2 = dx_move_dirents(data1, data2, map + split, count - split); + de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); de = dx_pack_dirents(data1, blocksize); - de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); - de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); + de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, + blocksize); + de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2, + blocksize); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); @@ -1268,6 +1304,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; unsigned int offset = 0; + unsigned int blocksize = dir->i_sb->s_blocksize; unsigned short reclen; int nlen, rlen, err; char *top; @@ -1275,7 +1312,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, reclen = EXT4_DIR_REC_LEN(namelen); if (!de) { de = (struct ext4_dir_entry_2 *)bh->b_data; - top = bh->b_data + dir->i_sb->s_blocksize - reclen; + top = bh->b_data + blocksize - reclen; while ((char *) de <= top) { if (!ext4_check_dir_entry("ext4_add_entry", dir, de, bh, offset)) { @@ -1287,7 +1324,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, return -EEXIST; } nlen = EXT4_DIR_REC_LEN(de->name_len); - rlen = ext4_rec_len_from_disk(de->rec_len); + rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); if ((de->inode? rlen - nlen: rlen) >= reclen) break; de = (struct ext4_dir_entry_2 *)((char *)de + rlen); @@ -1306,11 +1343,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, /* By now the buffer is marked for journaling */ nlen = EXT4_DIR_REC_LEN(de->name_len); - rlen = ext4_rec_len_from_disk(de->rec_len); + rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); if (de->inode) { struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); - de1->rec_len = ext4_rec_len_to_disk(rlen - nlen); - de->rec_len = ext4_rec_len_to_disk(nlen); + de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize); + de->rec_len = ext4_rec_len_to_disk(nlen, blocksize); de = de1; } de->file_type = EXT4_FT_UNKNOWN; @@ -1380,7 +1417,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, /* The 0th block becomes the root, move the dirents out */ fde = &root->dotdot; de = (struct ext4_dir_entry_2 *)((char *)fde + - ext4_rec_len_from_disk(fde->rec_len)); + ext4_rec_len_from_disk(fde->rec_len, blocksize)); if ((char *) de >= (((char *) root) + blocksize)) { ext4_error(dir->i_sb, __func__, "invalid rec_len for '..' in inode %lu", @@ -1402,12 +1439,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, memcpy (data1, de, len); de = (struct ext4_dir_entry_2 *) data1; top = data1 + len; - while ((char *)(de2 = ext4_next_entry(de)) < top) + while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) de = de2; - de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); + de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, + blocksize); /* Initialize the root; the dot dirents already exist */ de = (struct ext4_dir_entry_2 *) (&root->dotdot); - de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2)); + de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2), + blocksize); memset (&root->info, 0, sizeof(root->info)); root->info.info_length = sizeof(root->info); root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; @@ -1488,7 +1527,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return retval; de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; - de->rec_len = ext4_rec_len_to_disk(blocksize); + de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); return add_dirent_to_buf(handle, dentry, inode, de, bh); } @@ -1551,7 +1590,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, goto cleanup; node2 = (struct dx_node *)(bh2->b_data); entries2 = node2->entries; - node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize); + node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, + sb->s_blocksize); node2->fake.inode = 0; BUFFER_TRACE(frame->bh, "get_write_access"); err = ext4_journal_get_write_access(handle, frame->bh); @@ -1639,6 +1679,7 @@ static int ext4_delete_entry(handle_t *handle, struct buffer_head *bh) { struct ext4_dir_entry_2 *de, *pde; + unsigned int blocksize = dir->i_sb->s_blocksize; int i; i = 0; @@ -1652,8 +1693,11 @@ static int ext4_delete_entry(handle_t *handle, ext4_journal_get_write_access(handle, bh); if (pde) pde->rec_len = ext4_rec_len_to_disk( - ext4_rec_len_from_disk(pde->rec_len) + - ext4_rec_len_from_disk(de->rec_len)); + ext4_rec_len_from_disk(pde->rec_len, + blocksize) + + ext4_rec_len_from_disk(de->rec_len, + blocksize), + blocksize); else de->inode = 0; dir->i_version++; @@ -1661,9 +1705,9 @@ static int ext4_delete_entry(handle_t *handle, ext4_handle_dirty_metadata(handle, dir, bh); return 0; } - i += ext4_rec_len_from_disk(de->rec_len); + i += ext4_rec_len_from_disk(de->rec_len, blocksize); pde = de; - de = ext4_next_entry(de); + de = ext4_next_entry(de, blocksize); } return -ENOENT; } @@ -1793,6 +1837,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct inode *inode; struct buffer_head *dir_block; struct ext4_dir_entry_2 *de; + unsigned int blocksize = dir->i_sb->s_blocksize; int err, retries = 0; if (EXT4_DIR_LINK_MAX(dir)) @@ -1824,13 +1869,14 @@ retry: de = (struct ext4_dir_entry_2 *) dir_block->b_data; de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; - de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); + de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len), + blocksize); strcpy(de->name, "."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); - de = ext4_next_entry(de); + de = ext4_next_entry(de, blocksize); de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - - EXT4_DIR_REC_LEN(1)); + de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1), + blocksize); de->name_len = 2; strcpy(de->name, ".."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); @@ -1885,7 +1931,7 @@ static int empty_dir(struct inode *inode) return 1; } de = (struct ext4_dir_entry_2 *) bh->b_data; - de1 = ext4_next_entry(de); + de1 = ext4_next_entry(de, sb->s_blocksize); if (le32_to_cpu(de->inode) != inode->i_ino || !le32_to_cpu(de1->inode) || strcmp(".", de->name) || @@ -1896,9 +1942,9 @@ static int empty_dir(struct inode *inode) brelse(bh); return 1; } - offset = ext4_rec_len_from_disk(de->rec_len) + - ext4_rec_len_from_disk(de1->rec_len); - de = ext4_next_entry(de1); + offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) + + ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); + de = ext4_next_entry(de1, sb->s_blocksize); while (offset < inode->i_size) { if (!bh || (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { @@ -1927,8 +1973,8 @@ static int empty_dir(struct inode *inode) brelse(bh); return 0; } - offset += ext4_rec_len_from_disk(de->rec_len); - de = ext4_next_entry(de); + offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); + de = ext4_next_entry(de, sb->s_blocksize); } brelse(bh); return 1; @@ -2297,8 +2343,8 @@ retry: return err; } -#define PARENT_INO(buffer) \ - (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode) +#define PARENT_INO(buffer, size) \ + (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode) /* * Anybody can rename anything with this: the permission checks are left to the @@ -2311,7 +2357,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode, *new_inode; struct buffer_head *old_bh, *new_bh, *dir_bh; struct ext4_dir_entry_2 *old_de, *new_de; - int retval; + int retval, force_da_alloc = 0; old_bh = new_bh = dir_bh = NULL; @@ -2358,7 +2404,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); if (!dir_bh) goto end_rename; - if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) + if (le32_to_cpu(PARENT_INO(dir_bh->b_data, + old_dir->i_sb->s_blocksize)) != old_dir->i_ino) goto end_rename; retval = -EMLINK; if (!new_inode && new_dir != old_dir && @@ -2430,7 +2477,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, if (dir_bh) { BUFFER_TRACE(dir_bh, "get_write_access"); ext4_journal_get_write_access(handle, dir_bh); - PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); + PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = + cpu_to_le32(new_dir->i_ino); BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); ext4_handle_dirty_metadata(handle, old_dir, dir_bh); ext4_dec_count(handle, old_dir); @@ -2449,6 +2497,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ext4_mark_inode_dirty(handle, new_inode); if (!new_inode->i_nlink) ext4_orphan_add(handle, new_inode); + if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC)) + force_da_alloc = 1; } retval = 0; @@ -2457,6 +2507,8 @@ end_rename: brelse(old_bh); brelse(new_bh); ext4_journal_stop(handle); + if (retval == 0 && force_da_alloc) + ext4_alloc_da_blocks(old_inode); return retval; } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c06886abd658..546c7dd869e1 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -938,10 +938,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { ext4_group_t flex_group; flex_group = ext4_flex_group(sbi, input->group); - sbi->s_flex_groups[flex_group].free_blocks += - input->free_blocks_count; - sbi->s_flex_groups[flex_group].free_inodes += - EXT4_INODES_PER_GROUP(sb); + atomic_add(input->free_blocks_count, + &sbi->s_flex_groups[flex_group].free_blocks); + atomic_add(EXT4_INODES_PER_GROUP(sb), + &sbi->s_flex_groups[flex_group].free_inodes); } ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f7371a6a923d..9987bba99db3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -35,6 +35,7 @@ #include <linux/quotaops.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> +#include <linux/ctype.h> #include <linux/marker.h> #include <linux/log2.h> #include <linux/crc16.h> @@ -48,6 +49,7 @@ #include "group.h" struct proc_dir_entry *ext4_proc_root; +static struct kset *ext4_kset; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); @@ -577,9 +579,9 @@ static void ext4_put_super(struct super_block *sb) ext4_commit_super(sb, es, 1); } if (sbi->s_proc) { - remove_proc_entry("inode_readahead_blks", sbi->s_proc); remove_proc_entry(sb->s_id, ext4_proc_root); } + kobject_del(&sbi->s_kobj); for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); @@ -615,6 +617,17 @@ static void ext4_put_super(struct super_block *sb) ext4_blkdev_remove(sbi); } sb->s_fs_info = NULL; + /* + * Now that we are completely done shutting down the + * superblock, we need to actually destroy the kobject. + */ + unlock_kernel(); + unlock_super(sb); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + lock_super(sb); + lock_kernel(); + kfree(sbi->s_blockgroup_lock); kfree(sbi); return; } @@ -803,8 +816,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) seq_puts(seq, ",noacl"); #endif - if (!test_opt(sb, RESERVATION)) - seq_puts(seq, ",noreservation"); if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { seq_printf(seq, ",commit=%u", (unsigned) (sbi->s_commit_interval / HZ)); @@ -855,6 +866,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) if (test_opt(sb, DATA_ERR_ABORT)) seq_puts(seq, ",data_err=abort"); + if (test_opt(sb, NO_AUTO_DA_ALLOC)) + seq_puts(seq, ",noauto_da_alloc"); + ext4_show_quota_options(seq, sb); return 0; } @@ -1004,7 +1018,7 @@ enum { Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, - Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, + Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_update, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, @@ -1012,8 +1026,8 @@ enum { Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_grpquota, Opt_i_version, + Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, + Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_inode_readahead_blks, Opt_journal_ioprio }; @@ -1039,8 +1053,6 @@ static const match_table_t tokens = { {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, - {Opt_reservation, "reservation"}, - {Opt_noreservation, "noreservation"}, {Opt_noload, "noload"}, {Opt_nobh, "nobh"}, {Opt_bh, "bh"}, @@ -1068,6 +1080,8 @@ static const match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, {Opt_i_version, "i_version"}, {Opt_stripe, "stripe=%u"}, {Opt_resize, "resize"}, @@ -1075,6 +1089,9 @@ static const match_table_t tokens = { {Opt_nodelalloc, "nodelalloc"}, {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, {Opt_journal_ioprio, "journal_ioprio=%u"}, + {Opt_auto_da_alloc, "auto_da_alloc=%u"}, + {Opt_auto_da_alloc, "auto_da_alloc"}, + {Opt_noauto_da_alloc, "noauto_da_alloc"}, {Opt_err, NULL}, }; @@ -1207,12 +1224,6 @@ static int parse_options(char *options, struct super_block *sb, "not supported\n"); break; #endif - case Opt_reservation: - set_opt(sbi->s_mount_opt, RESERVATION); - break; - case Opt_noreservation: - clear_opt(sbi->s_mount_opt, RESERVATION); - break; case Opt_journal_update: /* @@@ FIXME */ /* Eventually we will want to be able to create @@ -1415,9 +1426,14 @@ set_qf_format: case Opt_abort: set_opt(sbi->s_mount_opt, ABORT); break; + case Opt_nobarrier: + clear_opt(sbi->s_mount_opt, BARRIER); + break; case Opt_barrier: - if (match_int(&args[0], &option)) - return 0; + if (match_int(&args[0], &option)) { + set_opt(sbi->s_mount_opt, BARRIER); + break; + } if (option) set_opt(sbi->s_mount_opt, BARRIER); else @@ -1463,6 +1479,11 @@ set_qf_format: return 0; if (option < 0 || option > (1 << 30)) return 0; + if (option & (option - 1)) { + printk(KERN_ERR "EXT4-fs: inode_readahead_blks" + " must be a power of 2\n"); + return 0; + } sbi->s_inode_readahead_blks = option; break; case Opt_journal_ioprio: @@ -1473,6 +1494,19 @@ set_qf_format: *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, option); break; + case Opt_noauto_da_alloc: + set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); + break; + case Opt_auto_da_alloc: + if (match_int(&args[0], &option)) { + clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); + break; + } + if (option) + clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); + else + set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); + break; default: printk(KERN_ERR "EXT4-fs: Unrecognized mount option \"%s\" " @@ -1612,10 +1646,12 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, &bh); flex_group = ext4_flex_group(sbi, i); - sbi->s_flex_groups[flex_group].free_inodes += - ext4_free_inodes_count(sb, gdp); - sbi->s_flex_groups[flex_group].free_blocks += - ext4_free_blks_count(sb, gdp); + atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, + ext4_free_inodes_count(sb, gdp)); + atomic_set(&sbi->s_flex_groups[flex_group].free_blocks, + ext4_free_blks_count(sb, gdp)); + atomic_set(&sbi->s_flex_groups[flex_group].used_dirs, + ext4_used_dirs_count(sb, gdp)); } return 1; @@ -1991,6 +2027,181 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) return 0; } +/* sysfs supprt */ + +struct ext4_attr { + struct attribute attr; + ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); + ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, + const char *, size_t); + int offset; +}; + +static int parse_strtoul(const char *buf, + unsigned long max, unsigned long *value) +{ + char *endp; + + while (*buf && isspace(*buf)) + buf++; + *value = simple_strtoul(buf, &endp, 0); + while (*endp && isspace(*endp)) + endp++; + if (*endp || *value > max) + return -EINVAL; + + return 0; +} + +static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); +} + +static ssize_t session_write_kbytes_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->s_buddy_cache->i_sb; + + return snprintf(buf, PAGE_SIZE, "%lu\n", + (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + sbi->s_sectors_written_start) >> 1); +} + +static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->s_buddy_cache->i_sb; + + return snprintf(buf, PAGE_SIZE, "%llu\n", + sbi->s_kbytes_written + + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + EXT4_SB(sb)->s_sectors_written_start) >> 1)); +} + +static ssize_t inode_readahead_blks_store(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned long t; + + if (parse_strtoul(buf, 0x40000000, &t)) + return -EINVAL; + + /* inode_readahead_blks must be a power of 2 */ + if (t & (t-1)) + return -EINVAL; + + sbi->s_inode_readahead_blks = t; + return count; +} + +static ssize_t sbi_ui_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t sbi_ui_store(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); + unsigned long t; + + if (parse_strtoul(buf, 0xffffffff, &t)) + return -EINVAL; + *ui = t; + return count; +} + +#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .offset = offsetof(struct ext4_sb_info, _elname), \ +} +#define EXT4_ATTR(name, mode, show, store) \ +static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) + +#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) +#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) +#define EXT4_RW_ATTR_SBI_UI(name, elname) \ + EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) +#define ATTR_LIST(name) &ext4_attr_##name.attr + +EXT4_RO_ATTR(delayed_allocation_blocks); +EXT4_RO_ATTR(session_write_kbytes); +EXT4_RO_ATTR(lifetime_write_kbytes); +EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, + inode_readahead_blks_store, s_inode_readahead_blks); +EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); +EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); +EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); +EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); +EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); +EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); + +static struct attribute *ext4_attrs[] = { + ATTR_LIST(delayed_allocation_blocks), + ATTR_LIST(session_write_kbytes), + ATTR_LIST(lifetime_write_kbytes), + ATTR_LIST(inode_readahead_blks), + ATTR_LIST(mb_stats), + ATTR_LIST(mb_max_to_scan), + ATTR_LIST(mb_min_to_scan), + ATTR_LIST(mb_order2_req), + ATTR_LIST(mb_stream_req), + ATTR_LIST(mb_group_prealloc), + NULL, +}; + +static ssize_t ext4_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, + s_kobj); + struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t ext4_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, + s_kobj); + struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void ext4_sb_release(struct kobject *kobj) +{ + struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + + +static struct sysfs_ops ext4_attr_ops = { + .show = ext4_attr_show, + .store = ext4_attr_store, +}; + +static struct kobj_type ext4_ktype = { + .default_attrs = ext4_attrs, + .sysfs_ops = &ext4_attr_ops, + .release = ext4_sb_release, +}; + static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) @@ -2021,12 +2232,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; + + sbi->s_blockgroup_lock = + kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); + if (!sbi->s_blockgroup_lock) { + kfree(sbi); + return -ENOMEM; + } sb->s_fs_info = sbi; sbi->s_mount_opt = 0; sbi->s_resuid = EXT4_DEF_RESUID; sbi->s_resgid = EXT4_DEF_RESGID; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sb_block = sb_block; + sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, + sectors[1]); unlock_kernel(); @@ -2064,6 +2284,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = le16_to_cpu(es->s_magic); if (sb->s_magic != EXT4_SUPER_MAGIC) goto cantfind_ext4; + sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); /* Set defaults before we parse the mount options */ def_mount_opts = le32_to_cpu(es->s_default_mount_opts); @@ -2101,7 +2322,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; - set_opt(sbi->s_mount_opt, RESERVATION); set_opt(sbi->s_mount_opt, BARRIER); /* @@ -2325,14 +2545,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_PROC_FS if (ext4_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); - - if (sbi->s_proc) - proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, - &ext4_ui_proc_fops, - &sbi->s_inode_readahead_blks); #endif - bgl_lock_init(&sbi->s_blockgroup_lock); + bgl_lock_init(sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logical_sb_block, i); @@ -2564,6 +2779,16 @@ no_journal: goto failed_mount4; } + sbi->s_kobj.kset = ext4_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, + "%s", sb->s_id); + if (err) { + ext4_mb_release(sb); + ext4_ext_release(sb); + goto failed_mount4; + }; + /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock @@ -2618,7 +2843,6 @@ failed_mount2: kfree(sbi->s_group_desc); failed_mount: if (sbi->s_proc) { - remove_proc_entry("inode_readahead_blks", sbi->s_proc); remove_proc_entry(sb->s_id, ext4_proc_root); } #ifdef CONFIG_QUOTA @@ -2913,6 +3137,10 @@ static int ext4_commit_super(struct super_block *sb, set_buffer_uptodate(sbh); } es->s_wtime = cpu_to_le32(get_seconds()); + es->s_kbytes_written = + cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + EXT4_SB(sb)->s_sectors_written_start) >> 1)); ext4_free_blocks_count_set(es, percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeblocks_counter)); es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( @@ -3647,45 +3875,6 @@ static int ext4_get_sb(struct file_system_type *fs_type, return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); } -#ifdef CONFIG_PROC_FS -static int ext4_ui_proc_show(struct seq_file *m, void *v) -{ - unsigned int *p = m->private; - - seq_printf(m, "%u\n", *p); - return 0; -} - -static int ext4_ui_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ext4_ui_proc_show, PDE(inode)->data); -} - -static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, - size_t cnt, loff_t *ppos) -{ - unsigned long *p = PDE(file->f_path.dentry->d_inode)->data; - char str[32]; - - if (cnt >= sizeof(str)) - return -EINVAL; - if (copy_from_user(str, buf, cnt)) - return -EFAULT; - - *p = simple_strtoul(str, NULL, 0); - return cnt; -} - -const struct file_operations ext4_ui_proc_fops = { - .owner = THIS_MODULE, - .open = ext4_ui_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = ext4_ui_proc_write, -}; -#endif - static struct file_system_type ext4_fs_type = { .owner = THIS_MODULE, .name = "ext4", @@ -3719,6 +3908,9 @@ static int __init init_ext4_fs(void) { int err; + ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); + if (!ext4_kset) + return -ENOMEM; ext4_proc_root = proc_mkdir("fs/ext4", NULL); err = init_ext4_mballoc(); if (err) @@ -3760,6 +3952,7 @@ static void __exit exit_ext4_fs(void) exit_ext4_xattr(); exit_ext4_mballoc(); remove_proc_entry("fs/ext4", NULL); + kset_unregister(ext4_kset); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 821d10f719bd..4e340fedf768 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1234,8 +1234,9 @@ static void fuse_vma_close(struct vm_area_struct *vma) * - sync(2) * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER */ -static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; /* * Don't use page->mapping as it may become NULL from a * concurrent truncate. diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 3b9e8de3500b..70b9b8548945 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -337,8 +337,9 @@ static int gfs2_allocate_page_backing(struct page *page) * blocks allocated on disk to back that page. */ -static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -412,6 +413,8 @@ out_unlock: gfs2_glock_dq(&gh); out: gfs2_holder_uninit(&gh); + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9b800d97a687..23a3c76711e0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -943,14 +943,13 @@ static struct vfsmount *hugetlbfs_vfsmount; static int can_do_hugetlb_shm(void) { - return likely(capable(CAP_IPC_LOCK) || - in_group_p(sysctl_hugetlb_shm_group) || - can_do_mlock()); + return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); } struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) { int error = -ENOMEM; + int unlock_shm = 0; struct file *file; struct inode *inode; struct dentry *dentry, *root; @@ -960,11 +959,14 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); - if (!can_do_hugetlb_shm()) - return ERR_PTR(-EPERM); - - if (!user_shm_lock(size, user)) - return ERR_PTR(-ENOMEM); + if (!can_do_hugetlb_shm()) { + if (user_shm_lock(size, user)) { + unlock_shm = 1; + WARN_ONCE(1, + "Using mlock ulimits for SHM_HUGETLB deprecated\n"); + } else + return ERR_PTR(-EPERM); + } root = hugetlbfs_vfsmount->mnt_root; quick_string.name = name; @@ -1004,7 +1006,8 @@ out_inode: out_dentry: dput(dentry); out_shm_unlock: - user_shm_unlock(size, user); + if (unlock_shm) + user_shm_unlock(size, user); return ERR_PTR(error); } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 62804e57a44c..4ea72377c7a2 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -367,6 +367,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; + int write_op = WRITE; /* * First job: lock down the current transaction and wait for @@ -401,6 +402,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; + if (commit_transaction->t_synchronous_commit) + write_op = WRITE_SYNC; stats.u.run.rs_wait = commit_transaction->t_max_wait; stats.u.run.rs_locked = jiffies; stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, @@ -680,7 +683,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(WRITE, bh); + submit_bh(write_op, bh); } cond_resched(); stats.u.run.rs_blocks_logged += bufs; diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 257ff2625765..bbe6d592d8b3 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -55,6 +55,25 @@ * need do nothing. * RevokeValid set, Revoked set: * buffer has been revoked. + * + * Locking rules: + * We keep two hash tables of revoke records. One hashtable belongs to the + * running transaction (is pointed to by journal->j_revoke), the other one + * belongs to the committing transaction. Accesses to the second hash table + * happen only from the kjournald and no other thread touches this table. Also + * journal_switch_revoke_table() which switches which hashtable belongs to the + * running and which to the committing transaction is called only from + * kjournald. Therefore we need no locks when accessing the hashtable belonging + * to the committing transaction. + * + * All users operating on the hash table belonging to the running transaction + * have a handle to the transaction. Therefore they are safe from kjournald + * switching hash tables under them. For operations on the lists of entries in + * the hash table j_revoke_lock is used. + * + * Finally, also replay code uses the hash tables but at this moment noone else + * can touch them (filesystem isn't mounted yet) and hence no locking is + * needed. */ #ifndef __KERNEL__ @@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, * the second time we would still have a pending revoke to cancel. So, * do not trust the Revoked bit on buffers unless RevokeValid is also * set. - * - * The caller must have the journal locked. */ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) { @@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) /* * Write revoke records to the journal for all entries in the current * revoke hash, deleting the entries as we go. - * - * Called with the journal lock held. */ - void jbd2_journal_write_revoke_records(journal_t *journal, transaction_t *transaction) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 28ce21d8598e..996ffda06bf3 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1315,6 +1315,8 @@ int jbd2_journal_stop(handle_t *handle) } } + if (handle->h_sync) + transaction->t_synchronous_commit = 1; current->journal_info = NULL; spin_lock(&journal->j_state_lock); spin_lock(&transaction->t_handle_lock); diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index aedc47a264c1..1f3b0fc0d351 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -139,55 +139,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) return 0; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static const struct in6_addr *nlmclnt_map_v4addr(const struct sockaddr *sap, - struct in6_addr *addr_mapped) -{ - const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; - - switch (sap->sa_family) { - case AF_INET6: - return &((const struct sockaddr_in6 *)sap)->sin6_addr; - case AF_INET: - ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, addr_mapped); - return addr_mapped; - } - - return NULL; -} - -/* - * If lockd is using a PF_INET6 listener, all incoming requests appear - * to come from AF_INET6 remotes. The address of AF_INET remotes are - * mapped to AF_INET6 automatically by the network layer. In case the - * user passed an AF_INET server address at mount time, ensure both - * addresses are AF_INET6 before comparing them. - */ -static int nlmclnt_cmp_addr(const struct nlm_host *host, - const struct sockaddr *sap) -{ - const struct in6_addr *addr1; - const struct in6_addr *addr2; - struct in6_addr addr1_mapped; - struct in6_addr addr2_mapped; - - addr1 = nlmclnt_map_v4addr(nlm_addr(host), &addr1_mapped); - if (likely(addr1 != NULL)) { - addr2 = nlmclnt_map_v4addr(sap, &addr2_mapped); - if (likely(addr2 != NULL)) - return ipv6_addr_equal(addr1, addr2); - } - - return 0; -} -#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ -static int nlmclnt_cmp_addr(const struct nlm_host *host, - const struct sockaddr *sap) -{ - return nlm_cmp_addr(nlm_addr(host), sap); -} -#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ - /* * The server lockd has called us back to tell us the lock was granted */ @@ -215,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) */ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) continue; - if (!nlmclnt_cmp_addr(block->b_host, addr)) + if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) continue; if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) continue; diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 5e2c4d5ac827..6d5d4a4169e5 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -16,6 +16,8 @@ #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> +#include <asm/unaligned.h> + #define NLMDBG_FACILITY NLMDBG_MONITOR #define NSM_PROGRAM 100024 #define NSM_VERSION 1 @@ -274,10 +276,12 @@ static void nsm_init_private(struct nsm_handle *nsm) { u64 *p = (u64 *)&nsm->sm_priv.data; struct timespec ts; + s64 ns; ktime_get_ts(&ts); - *p++ = timespec_to_ns(&ts); - *p = (unsigned long)nsm; + ns = timespec_to_ns(&ts); + put_unaligned(ns, p); + put_unaligned((unsigned long)nsm, p + 1); } static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 64f1c31b5853..abf83881f68a 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -53,17 +53,6 @@ static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; /* - * If the kernel has IPv6 support available, always listen for - * both AF_INET and AF_INET6 requests. - */ -#if (defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) && \ - defined(CONFIG_SUNRPC_REGISTER_V4) -static const sa_family_t nlmsvc_family = AF_INET6; -#else /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */ -static const sa_family_t nlmsvc_family = AF_INET; -#endif /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */ - -/* * These can be set at insmod time (useful for NFS as root filesystem), * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 */ @@ -204,19 +193,30 @@ lockd(void *vrqstp) return 0; } -static int create_lockd_listener(struct svc_serv *serv, char *name, - unsigned short port) +static int create_lockd_listener(struct svc_serv *serv, const char *name, + const int family, const unsigned short port) { struct svc_xprt *xprt; - xprt = svc_find_xprt(serv, name, 0, 0); + xprt = svc_find_xprt(serv, name, family, 0); if (xprt == NULL) - return svc_create_xprt(serv, name, port, SVC_SOCK_DEFAULTS); - + return svc_create_xprt(serv, name, family, port, + SVC_SOCK_DEFAULTS); svc_xprt_put(xprt); return 0; } +static int create_lockd_family(struct svc_serv *serv, const int family) +{ + int err; + + err = create_lockd_listener(serv, "udp", family, nlm_udpport); + if (err < 0) + return err; + + return create_lockd_listener(serv, "tcp", family, nlm_tcpport); +} + /* * Ensure there are active UDP and TCP listeners for lockd. * @@ -232,13 +232,15 @@ static int make_socks(struct svc_serv *serv) static int warned; int err; - err = create_lockd_listener(serv, "udp", nlm_udpport); + err = create_lockd_family(serv, PF_INET); if (err < 0) goto out_err; - err = create_lockd_listener(serv, "tcp", nlm_tcpport); - if (err < 0) +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + err = create_lockd_family(serv, PF_INET6); + if (err < 0 && err != -EAFNOSUPPORT) goto out_err; +#endif /* CONFIG_IPV6 || CONFIG_IPV6_MODULE */ warned = 0; return 0; @@ -274,7 +276,7 @@ int lockd_up(void) "lockd_up: no pid, %d users??\n", nlmsvc_users); error = -ENOMEM; - serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, nlmsvc_family, NULL); + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); goto out; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 3e634f2a1083..a886e692ddd0 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -38,19 +38,10 @@ static struct svc_program nfs4_callback_program; unsigned int nfs_callback_set_tcpport; unsigned short nfs_callback_tcpport; +unsigned short nfs_callback_tcpport6; static const int nfs_set_port_min = 0; static const int nfs_set_port_max = 65535; -/* - * If the kernel has IPv6 support available, always listen for - * both AF_INET and AF_INET6 requests. - */ -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static const sa_family_t nfs_callback_family = AF_INET6; -#else -static const sa_family_t nfs_callback_family = AF_INET; -#endif - static int param_set_port(const char *val, struct kernel_param *kp) { char *endp; @@ -116,19 +107,29 @@ int nfs_callback_up(void) mutex_lock(&nfs_callback_mutex); if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) goto out; - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, - nfs_callback_family, NULL); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); ret = -ENOMEM; if (!serv) goto out_err; - ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport, - SVC_SOCK_ANONYMOUS); + ret = svc_create_xprt(serv, "tcp", PF_INET, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; nfs_callback_tcpport = ret; dprintk("NFS: Callback listener port = %u (af %u)\n", - nfs_callback_tcpport, nfs_callback_family); + nfs_callback_tcpport, PF_INET); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + ret = svc_create_xprt(serv, "tcp", PF_INET6, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + if (ret > 0) { + nfs_callback_tcpport6 = ret; + dprintk("NFS: Callback listener port = %u (af %u)\n", + nfs_callback_tcpport6, PF_INET6); + } else if (ret != -EAFNOSUPPORT) + goto out_err; +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); if (IS_ERR(nfs_callback_info.rqst)) { diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index bb25d2135ff1..e110e286a262 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -72,5 +72,6 @@ extern void nfs_callback_down(void); extern unsigned int nfs_callback_set_tcpport; extern unsigned short nfs_callback_tcpport; +extern unsigned short nfs_callback_tcpport6; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2277421656e7..aba38017bdef 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -224,38 +224,6 @@ void nfs_put_client(struct nfs_client *clp) } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped) -{ - switch (sa->sa_family) { - default: - return NULL; - case AF_INET6: - return &((const struct sockaddr_in6 *)sa)->sin6_addr; - break; - case AF_INET: - ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr, - addr_mapped); - return addr_mapped; - } -} - -static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, - const struct sockaddr *sa2) -{ - const struct in6_addr *addr1; - const struct in6_addr *addr2; - struct in6_addr addr1_mapped; - struct in6_addr addr2_mapped; - - addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped); - if (likely(addr1 != NULL)) { - addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped); - if (likely(addr2 != NULL)) - return ipv6_addr_equal(addr1, addr2); - } - return 0; -} - /* * Test if two ip6 socket addresses refer to the same socket by * comparing relevant fields. The padding bytes specifically, are not @@ -267,38 +235,21 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, * * The caller should ensure both socket addresses are AF_INET6. */ -static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1, - const struct sockaddr *sa2) +static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, + const struct sockaddr *sa2) { - const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1; - const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2; + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; - if (!ipv6_addr_equal(&saddr1->sin6_addr, - &saddr1->sin6_addr)) + if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL && + sin1->sin6_scope_id != sin2->sin6_scope_id) return 0; - if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL && - saddr1->sin6_scope_id != saddr2->sin6_scope_id) - return 0; - return saddr1->sin6_port == saddr2->sin6_port; -} -#else -static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1, - const struct sockaddr_in *sa2) -{ - return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr; -} -static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, - const struct sockaddr *sa2) -{ - if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET)) - return 0; - return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1, - (const struct sockaddr_in *)sa2); + return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr); } - -static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1, - const struct sockaddr * sa2) +#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ +static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, + const struct sockaddr *sa2) { return 0; } @@ -311,20 +262,57 @@ static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1, * * The caller should ensure both socket addresses are AF_INET. */ +static int nfs_sockaddr_match_ipaddr4(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2; + + return nfs_sockaddr_match_ipaddr6(sa1, sa2) && + (sin1->sin6_port == sin2->sin6_port); +} + static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, const struct sockaddr *sa2) { - const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1; - const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2; + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2; - if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr) + return nfs_sockaddr_match_ipaddr4(sa1, sa2) && + (sin1->sin_port == sin2->sin_port); +} + +/* + * Test if two socket addresses represent the same actual socket, + * by comparing (only) relevant fields, excluding the port number. + */ +static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + if (sa1->sa_family != sa2->sa_family) return 0; - return saddr1->sin_port == saddr2->sin_port; + + switch (sa1->sa_family) { + case AF_INET: + return nfs_sockaddr_match_ipaddr4(sa1, sa2); + case AF_INET6: + return nfs_sockaddr_match_ipaddr6(sa1, sa2); + } + return 0; } /* * Test if two socket addresses represent the same actual socket, - * by comparing (only) relevant fields. + * by comparing (only) relevant fields, including the port number. */ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, const struct sockaddr *sa2) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 78bf72fc1db3..370b190a09d1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1624,8 +1624,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } else if (atomic_read(&new_dentry->d_count) > 1) /* dentry still busy? */ goto out; - } else - nfs_drop_nlink(new_inode); + } go_ahead: /* @@ -1638,10 +1637,8 @@ go_ahead: } nfs_inode_return_delegation(old_inode); - if (new_inode != NULL) { + if (new_inode != NULL) nfs_inode_return_delegation(new_inode); - d_delete(new_dentry); - } error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); @@ -1650,6 +1647,8 @@ out: if (rehash) d_rehash(rehash); if (!error) { + if (new_inode != NULL) + nfs_drop_nlink(new_inode); d_move(old_dentry, new_dentry); nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 90f292b520d2..0abf3f331f56 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -64,11 +64,7 @@ const struct file_operations nfs_file_operations = { .write = do_sync_write, .aio_read = nfs_file_read, .aio_write = nfs_file_write, -#ifdef CONFIG_MMU .mmap = nfs_file_mmap, -#else - .mmap = generic_file_mmap, -#endif .open = nfs_file_open, .flush = nfs_file_flush, .release = nfs_file_release, @@ -141,9 +137,6 @@ nfs_file_release(struct inode *inode, struct file *filp) dentry->d_parent->d_name.name, dentry->d_name.name); - /* Ensure that dirty pages are flushed out with the right creds */ - if (filp->f_mode & FMODE_WRITE) - nfs_wb_all(dentry->d_inode); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); } @@ -235,7 +228,6 @@ nfs_file_flush(struct file *file, fl_owner_t id) struct nfs_open_context *ctx = nfs_file_open_context(file); struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - int status; dprintk("NFS: flush(%s/%s)\n", dentry->d_parent->d_name.name, @@ -245,11 +237,8 @@ nfs_file_flush(struct file *file, fl_owner_t id) return 0; nfs_inc_stats(inode, NFSIOS_VFSFLUSH); - /* Ensure that data+attribute caches are up to date after close() */ - status = nfs_do_fsync(ctx, inode); - if (!status) - nfs_revalidate_inode(NFS_SERVER(inode), inode); - return status; + /* Flush writes to the server and return any errors */ + return nfs_do_fsync(ctx, inode); } static ssize_t @@ -304,11 +293,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dprintk("NFS: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_mapping(inode, file->f_mapping); + /* Note: generic_file_mmap() returns ENOSYS on nommu systems + * so we call that before revalidating the mapping + */ + status = generic_file_mmap(file, vma); if (!status) { vma->vm_ops = &nfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; - file_accessed(file); + status = nfs_revalidate_mapping(inode, file->f_mapping); } return status; } @@ -354,6 +345,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); + /* + * Prevent starvation issues if someone is doing a consistency + * sync-to-disk + */ + ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (ret) + return ret; + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; @@ -451,8 +451,9 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, }; -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct file *filp = vma->vm_file; struct dentry *dentry = filp->f_path.dentry; unsigned pagelen; @@ -483,6 +484,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ret = pagelen; out_unlock: unlock_page(page); + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b7c9b2df1f29..46177cb87064 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -156,7 +156,7 @@ int nfs4_path_walk(struct nfs_server *server, return ret; } - if (fattr.type != NFDIR) { + if (!S_ISDIR(fattr.mode)) { printk(KERN_ERR "nfs4_get_root:" " getroot encountered non-directory\n"); return -ENOTDIR; @@ -213,7 +213,7 @@ eat_dot_dir: return ret; } - if (fattr.type != NFDIR) { + if (!S_ISDIR(fattr.mode)) { printk(KERN_ERR "nfs4_get_root:" " lookupfh encountered non-directory\n"); return -ENOTDIR; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0c381686171e..a834d1d850b7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -66,6 +66,18 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) } /** + * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks + * @word: long word containing the bit lock + */ +int nfs_wait_bit_killable(void *word) +{ + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + +/** * nfs_compat_user_ino64 - returns the user-visible inode number * @fileid: 64-bit fileid * @@ -249,13 +261,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) struct inode *inode = ERR_PTR(-ENOENT); unsigned long hash; - if ((fattr->valid & NFS_ATTR_FATTR) == 0) + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) goto out_no_inode; - - if (!fattr->nlink) { - printk("NFS: Buggy server - nlink == 0!\n"); + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) goto out_no_inode; - } hash = nfs_fattr_to_ino_t(fattr); @@ -291,7 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) && fattr->size <= NFS_LIMIT_READDIRPLUS) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); /* Deal with crossing mountpoints */ - if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { + if ((fattr->valid & NFS_ATTR_FATTR_FSID) + && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) inode->i_op = &nfs_referral_inode_operations; else @@ -304,28 +314,45 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) else init_special_inode(inode, inode->i_mode, fattr->rdev); + memset(&inode->i_atime, 0, sizeof(inode->i_atime)); + memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); + memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); + nfsi->change_attr = 0; + inode->i_size = 0; + inode->i_nlink = 0; + inode->i_uid = -2; + inode->i_gid = -2; + inode->i_blocks = 0; + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; - inode->i_atime = fattr->atime; - inode->i_mtime = fattr->mtime; - inode->i_ctime = fattr->ctime; - if (fattr->valid & NFS_ATTR_FATTR_V4) + if (fattr->valid & NFS_ATTR_FATTR_ATIME) + inode->i_atime = fattr->atime; + if (fattr->valid & NFS_ATTR_FATTR_MTIME) + inode->i_mtime = fattr->mtime; + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode->i_ctime = fattr->ctime; + if (fattr->valid & NFS_ATTR_FATTR_CHANGE) nfsi->change_attr = fattr->change_attr; - inode->i_size = nfs_size_to_loff_t(fattr->size); - inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; - if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { + if (fattr->valid & NFS_ATTR_FATTR_SIZE) + inode->i_size = nfs_size_to_loff_t(fattr->size); + if (fattr->valid & NFS_ATTR_FATTR_NLINK) + inode->i_nlink = fattr->nlink; + if (fattr->valid & NFS_ATTR_FATTR_OWNER) + inode->i_uid = fattr->uid; + if (fattr->valid & NFS_ATTR_FATTR_GROUP) + inode->i_gid = fattr->gid; + if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + inode->i_blocks = fattr->du.nfs2.blocks; + if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } else { - inode->i_blocks = fattr->du.nfs2.blocks; } nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); nfsi->access_cache = RB_ROOT; unlock_new_inode(inode); @@ -514,6 +541,32 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) return err; } +/** + * nfs_close_context - Common close_context() routine NFSv2/v3 + * @ctx: pointer to context + * @is_sync: is this a synchronous close + * + * always ensure that the attributes are up to date if we're mounted + * with close-to-open semantics + */ +void nfs_close_context(struct nfs_open_context *ctx, int is_sync) +{ + struct inode *inode; + struct nfs_server *server; + + if (!(ctx->mode & FMODE_WRITE)) + return; + if (!is_sync) + return; + inode = ctx->path.dentry->d_inode; + if (!list_empty(&NFS_I(inode)->open_files)) + return; + server = NFS_SERVER(inode); + if (server->flags & NFS_MOUNT_NOCTO) + return; + nfs_revalidate_inode(server, inode); +} + static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) { struct nfs_open_context *ctx; @@ -540,24 +593,15 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) return ctx; } -static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) +static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) { - struct inode *inode; - - if (ctx == NULL) - return; + struct inode *inode = ctx->path.dentry->d_inode; - inode = ctx->path.dentry->d_inode; if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) return; list_del(&ctx->list); spin_unlock(&inode->i_lock); - if (ctx->state != NULL) { - if (wait) - nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); - else - nfs4_close_state(&ctx->path, ctx->state, ctx->mode); - } + NFS_PROTO(inode)->close_context(ctx, is_sync); if (ctx->cred != NULL) put_rpccred(ctx->cred); path_put(&ctx->path); @@ -670,9 +714,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (NFS_STALE(inode)) goto out; - if (NFS_STALE(inode)) - goto out; - nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); if (status != 0) { @@ -815,25 +856,31 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 && - nfsi->change_attr == fattr->pre_change_attr) { + if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) + && (fattr->valid & NFS_ATTR_FATTR_CHANGE) + && nfsi->change_attr == fattr->pre_change_attr) { nfsi->change_attr = fattr->change_attr; if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; } /* If we have atomic WCC data, we may update some attributes */ - if ((fattr->valid & NFS_ATTR_WCC) != 0) { - if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) + if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) + && (fattr->valid & NFS_ATTR_FATTR_CTIME) + && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { + + if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) + && (fattr->valid & NFS_ATTR_FATTR_MTIME) + && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; - } - if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && - nfsi->npages == 0) - i_size_write(inode, nfs_size_to_loff_t(fattr->size)); } + if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) + && (fattr->valid & NFS_ATTR_FATTR_SIZE) + && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) + && nfsi->npages == 0) + i_size_write(inode, nfs_size_to_loff_t(fattr->size)); } /** @@ -853,35 +900,39 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat /* Has the inode gone and changed behind our back? */ - if (nfsi->fileid != fattr->fileid - || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) + return -EIO; + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) return -EIO; - } - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && nfsi->change_attr != fattr->change_attr) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Verify a few of the more important attributes */ - if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - cur_size = i_size_read(inode); - new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->npages == 0) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + if (fattr->valid & NFS_ATTR_FATTR_SIZE) { + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + if (cur_size != new_isize && nfsi->npages == 0) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + } /* Have any file permissions changed? */ - if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) - || inode->i_uid != fattr->uid - || inode->i_gid != fattr->gid) + if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) + invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid) + invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ - if (inode->i_nlink != fattr->nlink) + if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) invalid |= NFS_INO_INVALID_ATTR; - if (!timespec_equal(&inode->i_atime, &fattr->atime)) + if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) @@ -893,11 +944,15 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) { + if (!(fattr->valid & NFS_ATTR_FATTR_CTIME)) + return 0; return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; } static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) { + if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + return 0; return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); } @@ -1033,20 +1088,31 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa /* Don't do a WCC update if these attributes are already stale */ if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !nfs_inode_attrs_need_update(inode, fattr)) { - fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC); + fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE + | NFS_ATTR_FATTR_PRESIZE + | NFS_ATTR_FATTR_PREMTIME + | NFS_ATTR_FATTR_PRECTIME); goto out_noforce; } - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && - (fattr->valid & NFS_ATTR_WCC_V4) == 0) { + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { fattr->pre_change_attr = NFS_I(inode)->change_attr; - fattr->valid |= NFS_ATTR_WCC_V4; + fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; } - if ((fattr->valid & NFS_ATTR_FATTR) != 0 && - (fattr->valid & NFS_ATTR_WCC) == 0) { + if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); + fattr->valid |= NFS_ATTR_FATTR_PRECTIME; + } + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); + fattr->valid |= NFS_ATTR_FATTR_PREMTIME; + } + if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PRESIZE) == 0) { fattr->pre_size = i_size_read(inode); - fattr->valid |= NFS_ATTR_WCC; + fattr->valid |= NFS_ATTR_FATTR_PRESIZE; } out_noforce: status = nfs_post_op_update_inode_locked(inode, fattr); @@ -1078,18 +1144,18 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) __func__, inode->i_sb->s_id, inode->i_ino, atomic_read(&inode->i_count), fattr->valid); - if (nfsi->fileid != fattr->fileid) + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) goto out_fileid; /* * Make sure the inode's type hasn't changed. */ - if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; server = NFS_SERVER(inode); /* Update the fsid? */ - if (S_ISDIR(inode->i_mode) && + if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && !nfs_fsid_equal(&server->fsid, &fattr->fsid) && !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags)) server->fsid = fattr->fsid; @@ -1099,14 +1165,27 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfsi->read_cache_jiffies = fattr->time_start; - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME - | NFS_INO_REVAL_PAGECACHE); + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME))) + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); /* More cache consistency checks */ - if (!(fattr->valid & NFS_ATTR_FATTR_V4)) { + if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { + if (nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + if (S_ISDIR(inode->i_mode)) + nfs_force_lookup_revalidate(inode); + nfsi->change_attr = fattr->change_attr; + } + } + + if (fattr->valid & NFS_ATTR_FATTR_MTIME) { /* NFSv2/v3: Check if the mtime agrees */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { dprintk("NFS: mtime change on server for file %s/%ld\n", @@ -1114,59 +1193,80 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } + } + if (fattr->valid & NFS_ATTR_FATTR_CTIME) { /* If ctime has changed we should definitely clear access+acl caches */ - if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - } else if (nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - if (S_ISDIR(inode->i_mode)) - nfs_force_lookup_revalidate(inode); + /* and probably clear data for a directory too as utimes can cause + * havoc with our cache. + */ + if (S_ISDIR(inode->i_mode)) { + invalid |= NFS_INO_INVALID_DATA; + nfs_force_lookup_revalidate(inode); + } + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + } } /* Check if our cached file size is stale */ - new_isize = nfs_size_to_loff_t(fattr->size); - cur_isize = i_size_read(inode); - if (new_isize != cur_isize) { - /* Do we perhaps have any outstanding writes, or has - * the file grown beyond our last write? */ - if (nfsi->npages == 0 || new_isize > cur_isize) { - i_size_write(inode, new_isize); - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + if (fattr->valid & NFS_ATTR_FATTR_SIZE) { + new_isize = nfs_size_to_loff_t(fattr->size); + cur_isize = i_size_read(inode); + if (new_isize != cur_isize) { + /* Do we perhaps have any outstanding writes, or has + * the file grown beyond our last write? */ + if (nfsi->npages == 0 || new_isize > cur_isize) { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + dprintk("NFS: isize change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); } - dprintk("NFS: isize change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); } - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - nfsi->change_attr = fattr->change_attr; - - if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || - inode->i_uid != fattr->uid || - inode->i_gid != fattr->gid) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + if (fattr->valid & NFS_ATTR_FATTR_ATIME) + memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - if (inode->i_nlink != fattr->nlink) - invalid |= NFS_INO_INVALID_ATTR; + if (fattr->valid & NFS_ATTR_FATTR_MODE) { + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + inode->i_mode = fattr->mode; + } + } + if (fattr->valid & NFS_ATTR_FATTR_OWNER) { + if (inode->i_uid != fattr->uid) { + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + inode->i_uid = fattr->uid; + } + } + if (fattr->valid & NFS_ATTR_FATTR_GROUP) { + if (inode->i_gid != fattr->gid) { + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + inode->i_gid = fattr->gid; + } + } - inode->i_mode = fattr->mode; - inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + if (fattr->valid & NFS_ATTR_FATTR_NLINK) { + if (inode->i_nlink != fattr->nlink) { + invalid |= NFS_INO_INVALID_ATTR; + if (S_ISDIR(inode->i_mode)) + invalid |= NFS_INO_INVALID_DATA; + inode->i_nlink = fattr->nlink; + } + } - if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { + if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } else { - inode->i_blocks = fattr->du.nfs2.blocks; } + if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + inode->i_blocks = fattr->du.nfs2.blocks; /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { @@ -1274,7 +1374,6 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); - nfsi->ncommit = 0; nfsi->npages = 0; atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 340ede8f608f..2041f68ff1cc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -152,6 +152,9 @@ extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; #endif +/* proc.c */ +void nfs_close_context(struct nfs_open_context *ctx, int is_sync); + /* dir.c */ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); @@ -165,6 +168,7 @@ extern void nfs_clear_inode(struct inode *); extern void nfs4_clear_inode(struct inode *); #endif void nfs_zap_acl_cache(struct inode *inode); +extern int nfs_wait_bit_killable(void *word); /* super.c */ void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 28bab67d1519..c862c9340f9a 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -120,8 +120,8 @@ xdr_decode_time(__be32 *p, struct timespec *timep) static __be32 * xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) { - u32 rdev; - fattr->type = (enum nfs_ftype) ntohl(*p++); + u32 rdev, type; + type = ntohl(*p++); fattr->mode = ntohl(*p++); fattr->nlink = ntohl(*p++); fattr->uid = ntohl(*p++); @@ -136,10 +136,9 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) p = xdr_decode_time(p, &fattr->atime); p = xdr_decode_time(p, &fattr->mtime); p = xdr_decode_time(p, &fattr->ctime); - fattr->valid |= NFS_ATTR_FATTR; + fattr->valid |= NFS_ATTR_FATTR_V2; fattr->rdev = new_decode_dev(rdev); - if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) { - fattr->type = NFFIFO; + if (type == NFCHR && rdev == NFS2_FIFO_DEV) { fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; fattr->rdev = 0; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c55be7a7679e..b82fe6847f14 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -834,4 +834,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .commit_done = nfs3_commit_done, .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, + .close_context = nfs_close_context, }; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 6cdeacffde46..e6a1932c7110 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -91,19 +91,15 @@ /* * Map file type to S_IFMT bits */ -static struct { - unsigned int mode; - unsigned int nfs2type; -} nfs_type2fmt[] = { - { 0, NFNON }, - { S_IFREG, NFREG }, - { S_IFDIR, NFDIR }, - { S_IFBLK, NFBLK }, - { S_IFCHR, NFCHR }, - { S_IFLNK, NFLNK }, - { S_IFSOCK, NFSOCK }, - { S_IFIFO, NFFIFO }, - { 0, NFBAD } +static const umode_t nfs_type2fmt[] = { + [NF3BAD] = 0, + [NF3REG] = S_IFREG, + [NF3DIR] = S_IFDIR, + [NF3BLK] = S_IFBLK, + [NF3CHR] = S_IFCHR, + [NF3LNK] = S_IFLNK, + [NF3SOCK] = S_IFSOCK, + [NF3FIFO] = S_IFIFO, }; /* @@ -148,13 +144,12 @@ static __be32 * xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) { unsigned int type, major, minor; - int fmode; + umode_t fmode; type = ntohl(*p++); - if (type >= NF3BAD) - type = NF3BAD; - fmode = nfs_type2fmt[type].mode; - fattr->type = nfs_type2fmt[type].nfs2type; + if (type > NF3FIFO) + type = NF3NON; + fmode = nfs_type2fmt[type]; fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode; fattr->nlink = ntohl(*p++); fattr->uid = ntohl(*p++); @@ -177,7 +172,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) p = xdr_decode_time3(p, &fattr->ctime); /* Update the mode bits */ - fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); + fattr->valid |= NFS_ATTR_FATTR_V3; return p; } @@ -233,7 +228,9 @@ xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr) p = xdr_decode_hyper(p, &fattr->pre_size); p = xdr_decode_time3(p, &fattr->pre_mtime); p = xdr_decode_time3(p, &fattr->pre_ctime); - fattr->valid |= NFS_ATTR_WCC; + fattr->valid |= NFS_ATTR_FATTR_PRESIZE + | NFS_ATTR_FATTR_PREMTIME + | NFS_ATTR_FATTR_PRECTIME; return p; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8dde84b988d9..97bacccff579 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -193,14 +193,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start, KM_USER0); } -static int nfs4_wait_bit_killable(void *word) -{ - if (fatal_signal_pending(current)) - return -ERESTARTSYS; - schedule(); - return 0; -} - static int nfs4_wait_clnt_recover(struct nfs_client *clp) { int res; @@ -208,7 +200,7 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp) might_sleep(); res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, - nfs4_wait_bit_killable, TASK_KILLABLE); + nfs_wait_bit_killable, TASK_KILLABLE); return res; } @@ -1439,7 +1431,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) if (calldata->arg.seqid == NULL) goto out_free_calldata; calldata->arg.fmode = 0; - calldata->arg.bitmask = server->attr_bitmask; + calldata->arg.bitmask = server->cache_consistency_bitmask; calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; @@ -1580,6 +1572,15 @@ out_drop: return 0; } +void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) +{ + if (ctx->state == NULL) + return; + if (is_sync) + nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); + else + nfs4_close_state(&ctx->path, ctx->state, ctx->mode); +} static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { @@ -1600,6 +1601,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->caps |= NFS_CAP_HARDLINKS; if (res.has_symlinks != 0) server->caps |= NFS_CAP_SYMLINKS; + memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); + server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; + server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->acl_bitmask = res.acl_bitmask; } return status; @@ -2079,7 +2083,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeres *res = msg->rpc_resp; - args->bitmask = server->attr_bitmask; + args->bitmask = server->cache_consistency_bitmask; res->server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; } @@ -2323,7 +2327,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .pages = &page, .pgbase = 0, .count = count, - .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, + .bitmask = NFS_SERVER(dentry->d_inode)->cache_consistency_bitmask, }; struct nfs4_readdir_res res; struct rpc_message msg = { @@ -2552,7 +2556,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag { struct nfs_server *server = NFS_SERVER(data->inode); - data->args.bitmask = server->attr_bitmask; + data->args.bitmask = server->cache_consistency_bitmask; data->res.server = server; data->timestamp = jiffies; @@ -2575,7 +2579,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa { struct nfs_server *server = NFS_SERVER(data->inode); - data->args.bitmask = server->attr_bitmask; + data->args.bitmask = server->cache_consistency_bitmask; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; } @@ -3678,6 +3682,19 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) return len; } +static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) +{ + if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) && + (fattr->valid & NFS_ATTR_FATTR_FSID) && + (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) + return; + + fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | + NFS_ATTR_FATTR_NLINK; + fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; + fattr->nlink = 2; +} + int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page) { @@ -3704,6 +3721,7 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, fs_locations->server = server; fs_locations->nlocations = 0; status = rpc_call_sync(server->client, &msg, 0); + nfs_fixup_referral_attributes(&fs_locations->fattr); dprintk("%s: returned status = %d\n", __func__, status); return status; } @@ -3767,6 +3785,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .commit_done = nfs4_commit_done, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, + .close_context = nfs4_close_context, }; /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2022fe47966f..0298e909559f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -62,8 +62,14 @@ static LIST_HEAD(nfs4_clientid_list); static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) { - int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, - nfs_callback_tcpport, cred); + unsigned short port; + int status; + + port = nfs_callback_tcpport; + if (clp->cl_addr.ss_family == AF_INET6) + port = nfs_callback_tcpport6; + + status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred); if (status == 0) status = nfs4_proc_setclientid_confirm(clp, cred); if (status == 0) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d1e4c8f8a0a9..1690f0e44b91 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -522,20 +522,17 @@ static int nfs4_stat_to_errno(int); decode_lookup_maxsz + \ decode_fs_locations_maxsz) -static struct { - unsigned int mode; - unsigned int nfs2type; -} nfs_type2fmt[] = { - { 0, NFNON }, - { S_IFREG, NFREG }, - { S_IFDIR, NFDIR }, - { S_IFBLK, NFBLK }, - { S_IFCHR, NFCHR }, - { S_IFLNK, NFLNK }, - { S_IFSOCK, NFSOCK }, - { S_IFIFO, NFFIFO }, - { 0, NFNON }, - { 0, NFNON }, +static const umode_t nfs_type2fmt[] = { + [NF4BAD] = 0, + [NF4REG] = S_IFREG, + [NF4DIR] = S_IFDIR, + [NF4BLK] = S_IFBLK, + [NF4CHR] = S_IFCHR, + [NF4LNK] = S_IFLNK, + [NF4SOCK] = S_IFSOCK, + [NF4FIFO] = S_IFIFO, + [NF4ATTRDIR] = 0, + [NF4NAMEDATTR] = 0, }; struct compound_hdr { @@ -2160,6 +2157,7 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3 static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type) { __be32 *p; + int ret = 0; *type = 0; if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) @@ -2172,14 +2170,16 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t * return -EIO; } bitmap[0] &= ~FATTR4_WORD0_TYPE; + ret = NFS_ATTR_FATTR_TYPE; } - dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type].nfs2type); - return 0; + dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); + return ret; } static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) { __be32 *p; + int ret = 0; *change = 0; if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) @@ -2188,15 +2188,17 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t READ_BUF(8); READ64(*change); bitmap[0] &= ~FATTR4_WORD0_CHANGE; + ret = NFS_ATTR_FATTR_CHANGE; } dprintk("%s: change attribute=%Lu\n", __func__, (unsigned long long)*change); - return 0; + return ret; } static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) { __be32 *p; + int ret = 0; *size = 0; if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) @@ -2205,9 +2207,10 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t * READ_BUF(8); READ64(*size); bitmap[0] &= ~FATTR4_WORD0_SIZE; + ret = NFS_ATTR_FATTR_SIZE; } dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); - return 0; + return ret; } static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) @@ -2245,6 +2248,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) { __be32 *p; + int ret = 0; fsid->major = 0; fsid->minor = 0; @@ -2255,11 +2259,12 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs READ64(fsid->major); READ64(fsid->minor); bitmap[0] &= ~FATTR4_WORD0_FSID; + ret = NFS_ATTR_FATTR_FSID; } dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __func__, (unsigned long long)fsid->major, (unsigned long long)fsid->minor); - return 0; + return ret; } static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) @@ -2297,6 +2302,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) { __be32 *p; + int ret = 0; *fileid = 0; if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) @@ -2305,14 +2311,16 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t READ_BUF(8); READ64(*fileid); bitmap[0] &= ~FATTR4_WORD0_FILEID; + ret = NFS_ATTR_FATTR_FILEID; } dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); - return 0; + return ret; } static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) { __be32 *p; + int ret = 0; *fileid = 0; if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) @@ -2321,9 +2329,10 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma READ_BUF(8); READ64(*fileid); bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + ret = NFS_ATTR_FATTR_FILEID; } dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); - return 0; + return ret; } static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) @@ -2479,6 +2488,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES) res->nlocations++; } + if (res->nlocations != 0) + status = NFS_ATTR_FATTR_V4_REFERRAL; out: dprintk("%s: fs_locations done, error = %d\n", __func__, status); return status; @@ -2580,26 +2591,30 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32 return status; } -static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode) +static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) { + uint32_t tmp; __be32 *p; + int ret = 0; *mode = 0; if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) return -EIO; if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { READ_BUF(4); - READ32(*mode); - *mode &= ~S_IFMT; + READ32(tmp); + *mode = tmp & ~S_IFMT; bitmap[1] &= ~FATTR4_WORD1_MODE; + ret = NFS_ATTR_FATTR_MODE; } dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); - return 0; + return ret; } static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) { __be32 *p; + int ret = 0; *nlink = 1; if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) @@ -2608,15 +2623,17 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t READ_BUF(4); READ32(*nlink); bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; + ret = NFS_ATTR_FATTR_NLINK; } dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); - return 0; + return ret; } static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) { uint32_t len; __be32 *p; + int ret = 0; *uid = -2; if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) @@ -2626,7 +2643,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf READ32(len); READ_BUF(len); if (len < XDR_MAX_NETOBJ) { - if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0) + if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) + ret = NFS_ATTR_FATTR_OWNER; + else dprintk("%s: nfs_map_name_to_uid failed!\n", __func__); } else @@ -2635,13 +2654,14 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf bitmap[1] &= ~FATTR4_WORD1_OWNER; } dprintk("%s: uid=%d\n", __func__, (int)*uid); - return 0; + return ret; } static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) { uint32_t len; __be32 *p; + int ret = 0; *gid = -2; if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) @@ -2651,7 +2671,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf READ32(len); READ_BUF(len); if (len < XDR_MAX_NETOBJ) { - if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0) + if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) + ret = NFS_ATTR_FATTR_GROUP; + else dprintk("%s: nfs_map_group_to_gid failed!\n", __func__); } else @@ -2660,13 +2682,14 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; } dprintk("%s: gid=%d\n", __func__, (int)*gid); - return 0; + return ret; } static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) { uint32_t major = 0, minor = 0; __be32 *p; + int ret = 0; *rdev = MKDEV(0,0); if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U))) @@ -2681,9 +2704,10 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde if (MAJOR(tmp) == major && MINOR(tmp) == minor) *rdev = tmp; bitmap[1] &= ~ FATTR4_WORD1_RAWDEV; + ret = NFS_ATTR_FATTR_RDEV; } dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); - return 0; + return ret; } static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) @@ -2740,6 +2764,7 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) { __be32 *p; + int ret = 0; *used = 0; if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) @@ -2748,10 +2773,11 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint READ_BUF(8); READ64(*used); bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; + ret = NFS_ATTR_FATTR_SPACE_USED; } dprintk("%s: space used=%Lu\n", __func__, (unsigned long long)*used); - return 0; + return ret; } static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) @@ -2778,6 +2804,8 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str return -EIO; if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) { status = decode_attr_time(xdr, time); + if (status == 0) + status = NFS_ATTR_FATTR_ATIME; bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS; } dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec); @@ -2794,6 +2822,8 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s return -EIO; if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) { status = decode_attr_time(xdr, time); + if (status == 0) + status = NFS_ATTR_FATTR_CTIME; bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA; } dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec); @@ -2810,6 +2840,8 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str return -EIO; if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) { status = decode_attr_time(xdr, time); + if (status == 0) + status = NFS_ATTR_FATTR_MTIME; bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY; } dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec); @@ -2994,63 +3026,116 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons uint32_t attrlen, bitmap[2] = {0}, type; - int status, fmode = 0; + int status; + umode_t fmode = 0; uint64_t fileid; - if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) - goto xdr_error; - if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + status = decode_op_hdr(xdr, OP_GETATTR); + if (status < 0) goto xdr_error; - fattr->bitmap[0] = bitmap[0]; - fattr->bitmap[1] = bitmap[1]; + status = decode_attr_bitmap(xdr, bitmap); + if (status < 0) + goto xdr_error; - if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + status = decode_attr_length(xdr, &attrlen, &savep); + if (status < 0) goto xdr_error; - if ((status = decode_attr_type(xdr, bitmap, &type)) != 0) + status = decode_attr_type(xdr, bitmap, &type); + if (status < 0) goto xdr_error; - fattr->type = nfs_type2fmt[type].nfs2type; - fmode = nfs_type2fmt[type].mode; + fattr->mode = 0; + if (status != 0) { + fattr->mode |= nfs_type2fmt[type]; + fattr->valid |= status; + } - if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0) + status = decode_attr_change(xdr, bitmap, &fattr->change_attr); + if (status < 0) goto xdr_error; - if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0) + fattr->valid |= status; + + status = decode_attr_size(xdr, bitmap, &fattr->size); + if (status < 0) goto xdr_error; - if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0) + fattr->valid |= status; + + status = decode_attr_fsid(xdr, bitmap, &fattr->fsid); + if (status < 0) goto xdr_error; - if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) + fattr->valid |= status; + + status = decode_attr_fileid(xdr, bitmap, &fattr->fileid); + if (status < 0) goto xdr_error; - if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, + fattr->valid |= status; + + status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, struct nfs4_fs_locations, - fattr))) != 0) + fattr)); + if (status < 0) goto xdr_error; - if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0) + fattr->valid |= status; + + status = decode_attr_mode(xdr, bitmap, &fmode); + if (status < 0) goto xdr_error; - fattr->mode |= fmode; - if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0) + if (status != 0) { + fattr->mode |= fmode; + fattr->valid |= status; + } + + status = decode_attr_nlink(xdr, bitmap, &fattr->nlink); + if (status < 0) goto xdr_error; - if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0) + fattr->valid |= status; + + status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid); + if (status < 0) goto xdr_error; - if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0) + fattr->valid |= status; + + status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid); + if (status < 0) goto xdr_error; - if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0) + fattr->valid |= status; + + status = decode_attr_rdev(xdr, bitmap, &fattr->rdev); + if (status < 0) goto xdr_error; - if ((status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used)) != 0) + fattr->valid |= status; + + status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used); + if (status < 0) goto xdr_error; - if ((status = decode_attr_time_access(xdr, bitmap, &fattr->atime)) != 0) + fattr->valid |= status; + + status = decode_attr_time_access(xdr, bitmap, &fattr->atime); + if (status < 0) goto xdr_error; - if ((status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime)) != 0) + fattr->valid |= status; + + status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime); + if (status < 0) goto xdr_error; - if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0) + fattr->valid |= status; + + status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime); + if (status < 0) goto xdr_error; - if ((status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid)) != 0) + fattr->valid |= status; + + status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid); + if (status < 0) goto xdr_error; - if (fattr->fileid == 0 && fileid != 0) + if (status != 0 && !(fattr->valid & status)) { fattr->fileid = fileid; - if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) - fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; + fattr->valid |= status; + } + + status = verify_attr_len(xdr, savep, attrlen); xdr_error: dprintk("%s: xdr returned %d\n", __func__, -status); return status; @@ -4078,9 +4163,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se status = decode_setattr(&xdr, res); if (status) goto out; - status = decode_getfattr(&xdr, res->fattr, res->server); - if (status == NFS4ERR_DELAY) - status = 0; + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7f079209d70a..e2975939126a 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -176,17 +176,6 @@ void nfs_release_request(struct nfs_page *req) kref_put(&req->wb_kref, nfs_free_request); } -static int nfs_wait_bit_killable(void *word) -{ - int ret = 0; - - if (fatal_signal_pending(current)) - ret = -ERESTARTSYS; - else - schedule(); - return ret; -} - /** * nfs_wait_on_request - Wait for a request to complete. * @req: request to wait upon. diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 193465210d7c..7be72d90d49d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -663,4 +663,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .commit_setup = nfs_proc_commit_setup, .lock = nfs_proc_lock, .lock_check_bounds = nfs_lock_check_bounds, + .close_context = nfs_close_context, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d6686f4786dc..0942fcbbad3c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1018,6 +1018,7 @@ static int nfs_parse_mount_options(char *raw, case Opt_rdma: mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(p); break; case Opt_acl: mnt->flags &= ~NFS_MOUNT_NOACL; @@ -1205,12 +1206,14 @@ static int nfs_parse_mount_options(char *raw, /* vector side protocols to TCP */ mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(string); break; default: errors++; dfprintk(MOUNT, "NFS: unrecognized " "transport protocol\n"); } + kfree(string); break; case Opt_mountproto: string = match_strdup(args); @@ -1218,7 +1221,6 @@ static int nfs_parse_mount_options(char *raw, goto out_nomem; token = match_token(string, nfs_xprt_protocol_tokens, args); - kfree(string); switch (token) { case Opt_xprt_udp: diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9f9845859fc1..e560a78995a3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -313,19 +313,34 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; + unsigned long *bitlock = &NFS_I(inode)->flags; struct nfs_pageio_descriptor pgio; int err; + /* Stop dirtying of new pages while we sync */ + err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (err) + goto out_err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); + + clear_bit_unlock(NFS_INO_FLUSHING, bitlock); + smp_mb__after_clear_bit(); + wake_up_bit(bitlock, NFS_INO_FLUSHING); + if (err < 0) - return err; - if (pgio.pg_error < 0) - return pgio.pg_error; + goto out_err; + err = pgio.pg_error; + if (err < 0) + goto out_err; return 0; +out_err: + return err; } /* @@ -404,7 +419,6 @@ nfs_mark_request_commit(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - nfsi->ncommit++; set_bit(PG_CLEAN, &(req)->wb_flags); radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, @@ -524,6 +538,12 @@ static void nfs_cancel_commit_list(struct list_head *head) } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static int +nfs_need_commit(struct nfs_inode *nfsi) +{ + return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); +} + /* * nfs_scan_commit - Scan an inode for commit requests * @inode: NFS inode to scan @@ -538,16 +558,18 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res = 0; - if (nfsi->ncommit != 0) { - res = nfs_scan_list(nfsi, dst, idx_start, npages, - NFS_PAGE_TAG_COMMIT); - nfsi->ncommit -= res; - } - return res; + if (!nfs_need_commit(nfsi)) + return 0; + + return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); } #else +static inline int nfs_need_commit(struct nfs_inode *nfsi) +{ + return 0; +} + static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { return 0; @@ -820,7 +842,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->args.stable = NFS_UNSTABLE; if (how & FLUSH_STABLE) { data->args.stable = NFS_DATA_SYNC; - if (!NFS_I(inode)->ncommit) + if (!nfs_need_commit(NFS_I(inode))) data->args.stable = NFS_FILE_SYNC; } @@ -1425,18 +1447,13 @@ static int nfs_write_mapping(struct address_space *mapping, int how) { struct writeback_control wbc = { .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_NONE, + .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, .for_writepages = 1, }; - int ret; - ret = __nfs_write_mapping(mapping, &wbc, how); - if (ret < 0) - return ret; - wbc.sync_mode = WB_SYNC_ALL; return __nfs_write_mapping(mapping, &wbc, how); } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3d93b2064ce5..a4ed8644d69c 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -938,10 +938,12 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) char transport[16]; int port; if (sscanf(buf, "%15s %4d", transport, &port) == 2) { + if (port < 1 || port > 65535) + return -EINVAL; err = nfsd_create_serv(); if (!err) { err = svc_create_xprt(nfsd_serv, - transport, port, + transport, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err == -ENOENT) /* Give a reasonable perror msg for @@ -960,7 +962,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) char transport[16]; int port; if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { - if (port == 0) + if (port < 1 || port > 65535) return -EINVAL; if (nfsd_serv) { xprt = svc_find_xprt(nfsd_serv, transport, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 07e4f5d7baa8..bc3567bab8c4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -229,7 +229,6 @@ int nfsd_create_serv(void) atomic_set(&nfsd_busy, 0); nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - AF_INET, nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; @@ -244,7 +243,7 @@ static int nfsd_init_socks(int port) if (!list_empty(&nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nfsd_serv, "udp", port, + error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, SVC_SOCK_DEFAULTS); if (error < 0) return error; @@ -253,7 +252,7 @@ static int nfsd_init_socks(int port) if (error < 0) return error; - error = svc_create_xprt(nfsd_serv, "tcp", port, + error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, SVC_SOCK_DEFAULTS); if (error < 0) return error; diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 34314b33dbd4..5a9e34475e37 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -32,8 +32,8 @@ /** * The little endian Unicode string $I30 as a global constant. */ -ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), - const_cpu_to_le16('3'), const_cpu_to_le16('0'), 0 }; +ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'), + cpu_to_le16('3'), cpu_to_le16('0'), 0 }; /** * ntfs_lookup_inode_by_name - find an inode in a directory given its name diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 86bef156cf0a..82c5085559c6 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1975,8 +1975,7 @@ int ntfs_read_inode_mount(struct inode *vi) goto em_put_err_out; next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + le16_to_cpu(al_entry->length)); - if (le32_to_cpu(al_entry->type) > - const_le32_to_cpu(AT_DATA)) + if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA)) goto em_put_err_out; if (AT_DATA != al_entry->type) continue; diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 1e383328eceb..50931b1ce4b9 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -31,19 +31,8 @@ #include "types.h" -/* - * Constant endianness conversion defines. - */ -#define const_le16_to_cpu(x) __constant_le16_to_cpu(x) -#define const_le32_to_cpu(x) __constant_le32_to_cpu(x) -#define const_le64_to_cpu(x) __constant_le64_to_cpu(x) - -#define const_cpu_to_le16(x) __constant_cpu_to_le16(x) -#define const_cpu_to_le32(x) __constant_cpu_to_le32(x) -#define const_cpu_to_le64(x) __constant_cpu_to_le64(x) - /* The NTFS oem_id "NTFS " */ -#define magicNTFS const_cpu_to_le64(0x202020205346544eULL) +#define magicNTFS cpu_to_le64(0x202020205346544eULL) /* * Location of bootsector on partition: @@ -114,25 +103,25 @@ typedef struct { */ enum { /* Found in $MFT/$DATA. */ - magic_FILE = const_cpu_to_le32(0x454c4946), /* Mft entry. */ - magic_INDX = const_cpu_to_le32(0x58444e49), /* Index buffer. */ - magic_HOLE = const_cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */ + magic_FILE = cpu_to_le32(0x454c4946), /* Mft entry. */ + magic_INDX = cpu_to_le32(0x58444e49), /* Index buffer. */ + magic_HOLE = cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */ /* Found in $LogFile/$DATA. */ - magic_RSTR = const_cpu_to_le32(0x52545352), /* Restart page. */ - magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */ + magic_RSTR = cpu_to_le32(0x52545352), /* Restart page. */ + magic_RCRD = cpu_to_le32(0x44524352), /* Log record page. */ /* Found in $LogFile/$DATA. (May be found in $MFT/$DATA, also?) */ - magic_CHKD = const_cpu_to_le32(0x444b4843), /* Modified by chkdsk. */ + magic_CHKD = cpu_to_le32(0x444b4843), /* Modified by chkdsk. */ /* Found in all ntfs record containing records. */ - magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector + magic_BAAD = cpu_to_le32(0x44414142), /* Failed multi sector transfer was detected. */ /* * Found in $LogFile/$DATA when a page is full of 0xff bytes and is * thus not initialized. Page must be initialized before using it. */ - magic_empty = const_cpu_to_le32(0xffffffff) /* Record is empty. */ + magic_empty = cpu_to_le32(0xffffffff) /* Record is empty. */ }; typedef le32 NTFS_RECORD_TYPE; @@ -258,8 +247,8 @@ typedef enum { * information about the mft record in which they are present. */ enum { - MFT_RECORD_IN_USE = const_cpu_to_le16(0x0001), - MFT_RECORD_IS_DIRECTORY = const_cpu_to_le16(0x0002), + MFT_RECORD_IN_USE = cpu_to_le16(0x0001), + MFT_RECORD_IS_DIRECTORY = cpu_to_le16(0x0002), } __attribute__ ((__packed__)); typedef le16 MFT_RECORD_FLAGS; @@ -309,7 +298,7 @@ typedef le16 MFT_RECORD_FLAGS; * Note: The _LE versions will return a CPU endian formatted value! */ #define MFT_REF_MASK_CPU 0x0000ffffffffffffULL -#define MFT_REF_MASK_LE const_cpu_to_le64(MFT_REF_MASK_CPU) +#define MFT_REF_MASK_LE cpu_to_le64(MFT_REF_MASK_CPU) typedef u64 MFT_REF; typedef le64 leMFT_REF; @@ -477,25 +466,25 @@ typedef struct { * a revealing choice of symbol I do not know what is... (-; */ enum { - AT_UNUSED = const_cpu_to_le32( 0), - AT_STANDARD_INFORMATION = const_cpu_to_le32( 0x10), - AT_ATTRIBUTE_LIST = const_cpu_to_le32( 0x20), - AT_FILE_NAME = const_cpu_to_le32( 0x30), - AT_OBJECT_ID = const_cpu_to_le32( 0x40), - AT_SECURITY_DESCRIPTOR = const_cpu_to_le32( 0x50), - AT_VOLUME_NAME = const_cpu_to_le32( 0x60), - AT_VOLUME_INFORMATION = const_cpu_to_le32( 0x70), - AT_DATA = const_cpu_to_le32( 0x80), - AT_INDEX_ROOT = const_cpu_to_le32( 0x90), - AT_INDEX_ALLOCATION = const_cpu_to_le32( 0xa0), - AT_BITMAP = const_cpu_to_le32( 0xb0), - AT_REPARSE_POINT = const_cpu_to_le32( 0xc0), - AT_EA_INFORMATION = const_cpu_to_le32( 0xd0), - AT_EA = const_cpu_to_le32( 0xe0), - AT_PROPERTY_SET = const_cpu_to_le32( 0xf0), - AT_LOGGED_UTILITY_STREAM = const_cpu_to_le32( 0x100), - AT_FIRST_USER_DEFINED_ATTRIBUTE = const_cpu_to_le32( 0x1000), - AT_END = const_cpu_to_le32(0xffffffff) + AT_UNUSED = cpu_to_le32( 0), + AT_STANDARD_INFORMATION = cpu_to_le32( 0x10), + AT_ATTRIBUTE_LIST = cpu_to_le32( 0x20), + AT_FILE_NAME = cpu_to_le32( 0x30), + AT_OBJECT_ID = cpu_to_le32( 0x40), + AT_SECURITY_DESCRIPTOR = cpu_to_le32( 0x50), + AT_VOLUME_NAME = cpu_to_le32( 0x60), + AT_VOLUME_INFORMATION = cpu_to_le32( 0x70), + AT_DATA = cpu_to_le32( 0x80), + AT_INDEX_ROOT = cpu_to_le32( 0x90), + AT_INDEX_ALLOCATION = cpu_to_le32( 0xa0), + AT_BITMAP = cpu_to_le32( 0xb0), + AT_REPARSE_POINT = cpu_to_le32( 0xc0), + AT_EA_INFORMATION = cpu_to_le32( 0xd0), + AT_EA = cpu_to_le32( 0xe0), + AT_PROPERTY_SET = cpu_to_le32( 0xf0), + AT_LOGGED_UTILITY_STREAM = cpu_to_le32( 0x100), + AT_FIRST_USER_DEFINED_ATTRIBUTE = cpu_to_le32( 0x1000), + AT_END = cpu_to_le32(0xffffffff) }; typedef le32 ATTR_TYPE; @@ -539,13 +528,13 @@ typedef le32 ATTR_TYPE; * equal then the second le32 values would be compared, etc. */ enum { - COLLATION_BINARY = const_cpu_to_le32(0x00), - COLLATION_FILE_NAME = const_cpu_to_le32(0x01), - COLLATION_UNICODE_STRING = const_cpu_to_le32(0x02), - COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10), - COLLATION_NTOFS_SID = const_cpu_to_le32(0x11), - COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12), - COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13), + COLLATION_BINARY = cpu_to_le32(0x00), + COLLATION_FILE_NAME = cpu_to_le32(0x01), + COLLATION_UNICODE_STRING = cpu_to_le32(0x02), + COLLATION_NTOFS_ULONG = cpu_to_le32(0x10), + COLLATION_NTOFS_SID = cpu_to_le32(0x11), + COLLATION_NTOFS_SECURITY_HASH = cpu_to_le32(0x12), + COLLATION_NTOFS_ULONGS = cpu_to_le32(0x13), }; typedef le32 COLLATION_RULE; @@ -559,25 +548,25 @@ typedef le32 COLLATION_RULE; * NT4. */ enum { - ATTR_DEF_INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be + ATTR_DEF_INDEXABLE = cpu_to_le32(0x02), /* Attribute can be indexed. */ - ATTR_DEF_MULTIPLE = const_cpu_to_le32(0x04), /* Attribute type + ATTR_DEF_MULTIPLE = cpu_to_le32(0x04), /* Attribute type can be present multiple times in the mft records of an inode. */ - ATTR_DEF_NOT_ZERO = const_cpu_to_le32(0x08), /* Attribute value + ATTR_DEF_NOT_ZERO = cpu_to_le32(0x08), /* Attribute value must contain at least one non-zero byte. */ - ATTR_DEF_INDEXED_UNIQUE = const_cpu_to_le32(0x10), /* Attribute must be + ATTR_DEF_INDEXED_UNIQUE = cpu_to_le32(0x10), /* Attribute must be indexed and the attribute value must be unique for the attribute type in all of the mft records of an inode. */ - ATTR_DEF_NAMED_UNIQUE = const_cpu_to_le32(0x20), /* Attribute must be + ATTR_DEF_NAMED_UNIQUE = cpu_to_le32(0x20), /* Attribute must be named and the name must be unique for the attribute type in all of the mft records of an inode. */ - ATTR_DEF_RESIDENT = const_cpu_to_le32(0x40), /* Attribute must be + ATTR_DEF_RESIDENT = cpu_to_le32(0x40), /* Attribute must be resident. */ - ATTR_DEF_ALWAYS_LOG = const_cpu_to_le32(0x80), /* Always log + ATTR_DEF_ALWAYS_LOG = cpu_to_le32(0x80), /* Always log modifications to this attribute, regardless of whether it is resident or non-resident. Without this, only log @@ -614,12 +603,12 @@ typedef struct { * Attribute flags (16-bit). */ enum { - ATTR_IS_COMPRESSED = const_cpu_to_le16(0x0001), - ATTR_COMPRESSION_MASK = const_cpu_to_le16(0x00ff), /* Compression method + ATTR_IS_COMPRESSED = cpu_to_le16(0x0001), + ATTR_COMPRESSION_MASK = cpu_to_le16(0x00ff), /* Compression method mask. Also, first illegal value. */ - ATTR_IS_ENCRYPTED = const_cpu_to_le16(0x4000), - ATTR_IS_SPARSE = const_cpu_to_le16(0x8000), + ATTR_IS_ENCRYPTED = cpu_to_le16(0x4000), + ATTR_IS_SPARSE = cpu_to_le16(0x8000), } __attribute__ ((__packed__)); typedef le16 ATTR_FLAGS; @@ -811,32 +800,32 @@ typedef ATTR_RECORD ATTR_REC; * flags appear in all of the above. */ enum { - FILE_ATTR_READONLY = const_cpu_to_le32(0x00000001), - FILE_ATTR_HIDDEN = const_cpu_to_le32(0x00000002), - FILE_ATTR_SYSTEM = const_cpu_to_le32(0x00000004), - /* Old DOS volid. Unused in NT. = const_cpu_to_le32(0x00000008), */ + FILE_ATTR_READONLY = cpu_to_le32(0x00000001), + FILE_ATTR_HIDDEN = cpu_to_le32(0x00000002), + FILE_ATTR_SYSTEM = cpu_to_le32(0x00000004), + /* Old DOS volid. Unused in NT. = cpu_to_le32(0x00000008), */ - FILE_ATTR_DIRECTORY = const_cpu_to_le32(0x00000010), + FILE_ATTR_DIRECTORY = cpu_to_le32(0x00000010), /* Note, FILE_ATTR_DIRECTORY is not considered valid in NT. It is reserved for the DOS SUBDIRECTORY flag. */ - FILE_ATTR_ARCHIVE = const_cpu_to_le32(0x00000020), - FILE_ATTR_DEVICE = const_cpu_to_le32(0x00000040), - FILE_ATTR_NORMAL = const_cpu_to_le32(0x00000080), + FILE_ATTR_ARCHIVE = cpu_to_le32(0x00000020), + FILE_ATTR_DEVICE = cpu_to_le32(0x00000040), + FILE_ATTR_NORMAL = cpu_to_le32(0x00000080), - FILE_ATTR_TEMPORARY = const_cpu_to_le32(0x00000100), - FILE_ATTR_SPARSE_FILE = const_cpu_to_le32(0x00000200), - FILE_ATTR_REPARSE_POINT = const_cpu_to_le32(0x00000400), - FILE_ATTR_COMPRESSED = const_cpu_to_le32(0x00000800), + FILE_ATTR_TEMPORARY = cpu_to_le32(0x00000100), + FILE_ATTR_SPARSE_FILE = cpu_to_le32(0x00000200), + FILE_ATTR_REPARSE_POINT = cpu_to_le32(0x00000400), + FILE_ATTR_COMPRESSED = cpu_to_le32(0x00000800), - FILE_ATTR_OFFLINE = const_cpu_to_le32(0x00001000), - FILE_ATTR_NOT_CONTENT_INDEXED = const_cpu_to_le32(0x00002000), - FILE_ATTR_ENCRYPTED = const_cpu_to_le32(0x00004000), + FILE_ATTR_OFFLINE = cpu_to_le32(0x00001000), + FILE_ATTR_NOT_CONTENT_INDEXED = cpu_to_le32(0x00002000), + FILE_ATTR_ENCRYPTED = cpu_to_le32(0x00004000), - FILE_ATTR_VALID_FLAGS = const_cpu_to_le32(0x00007fb7), + FILE_ATTR_VALID_FLAGS = cpu_to_le32(0x00007fb7), /* Note, FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the FILE_ATTR_DEVICE and preserves everything else. This mask is used to obtain all flags that are valid for reading. */ - FILE_ATTR_VALID_SET_FLAGS = const_cpu_to_le32(0x000031a7), + FILE_ATTR_VALID_SET_FLAGS = cpu_to_le32(0x000031a7), /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask @@ -846,11 +835,11 @@ enum { * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION * attribute of an mft record. */ - FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = const_cpu_to_le32(0x10000000), + FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = cpu_to_le32(0x10000000), /* Note, this is a copy of the corresponding bit from the mft record, telling us whether this is a directory or not, i.e. whether it has an index root attribute or not. */ - FILE_ATTR_DUP_VIEW_INDEX_PRESENT = const_cpu_to_le32(0x20000000), + FILE_ATTR_DUP_VIEW_INDEX_PRESENT = cpu_to_le32(0x20000000), /* Note, this is a copy of the corresponding bit from the mft record, telling us whether this file has a view index present (eg. object id index, quota index, one of the security indexes or the encrypting @@ -1446,42 +1435,42 @@ enum { /* Specific rights for files and directories are as follows: */ /* Right to read data from the file. (FILE) */ - FILE_READ_DATA = const_cpu_to_le32(0x00000001), + FILE_READ_DATA = cpu_to_le32(0x00000001), /* Right to list contents of a directory. (DIRECTORY) */ - FILE_LIST_DIRECTORY = const_cpu_to_le32(0x00000001), + FILE_LIST_DIRECTORY = cpu_to_le32(0x00000001), /* Right to write data to the file. (FILE) */ - FILE_WRITE_DATA = const_cpu_to_le32(0x00000002), + FILE_WRITE_DATA = cpu_to_le32(0x00000002), /* Right to create a file in the directory. (DIRECTORY) */ - FILE_ADD_FILE = const_cpu_to_le32(0x00000002), + FILE_ADD_FILE = cpu_to_le32(0x00000002), /* Right to append data to the file. (FILE) */ - FILE_APPEND_DATA = const_cpu_to_le32(0x00000004), + FILE_APPEND_DATA = cpu_to_le32(0x00000004), /* Right to create a subdirectory. (DIRECTORY) */ - FILE_ADD_SUBDIRECTORY = const_cpu_to_le32(0x00000004), + FILE_ADD_SUBDIRECTORY = cpu_to_le32(0x00000004), /* Right to read extended attributes. (FILE/DIRECTORY) */ - FILE_READ_EA = const_cpu_to_le32(0x00000008), + FILE_READ_EA = cpu_to_le32(0x00000008), /* Right to write extended attributes. (FILE/DIRECTORY) */ - FILE_WRITE_EA = const_cpu_to_le32(0x00000010), + FILE_WRITE_EA = cpu_to_le32(0x00000010), /* Right to execute a file. (FILE) */ - FILE_EXECUTE = const_cpu_to_le32(0x00000020), + FILE_EXECUTE = cpu_to_le32(0x00000020), /* Right to traverse the directory. (DIRECTORY) */ - FILE_TRAVERSE = const_cpu_to_le32(0x00000020), + FILE_TRAVERSE = cpu_to_le32(0x00000020), /* * Right to delete a directory and all the files it contains (its * children), even if the files are read-only. (DIRECTORY) */ - FILE_DELETE_CHILD = const_cpu_to_le32(0x00000040), + FILE_DELETE_CHILD = cpu_to_le32(0x00000040), /* Right to read file attributes. (FILE/DIRECTORY) */ - FILE_READ_ATTRIBUTES = const_cpu_to_le32(0x00000080), + FILE_READ_ATTRIBUTES = cpu_to_le32(0x00000080), /* Right to change file attributes. (FILE/DIRECTORY) */ - FILE_WRITE_ATTRIBUTES = const_cpu_to_le32(0x00000100), + FILE_WRITE_ATTRIBUTES = cpu_to_le32(0x00000100), /* * The standard rights (bits 16 to 23). These are independent of the @@ -1489,27 +1478,27 @@ enum { */ /* Right to delete the object. */ - DELETE = const_cpu_to_le32(0x00010000), + DELETE = cpu_to_le32(0x00010000), /* * Right to read the information in the object's security descriptor, * not including the information in the SACL, i.e. right to read the * security descriptor and owner. */ - READ_CONTROL = const_cpu_to_le32(0x00020000), + READ_CONTROL = cpu_to_le32(0x00020000), /* Right to modify the DACL in the object's security descriptor. */ - WRITE_DAC = const_cpu_to_le32(0x00040000), + WRITE_DAC = cpu_to_le32(0x00040000), /* Right to change the owner in the object's security descriptor. */ - WRITE_OWNER = const_cpu_to_le32(0x00080000), + WRITE_OWNER = cpu_to_le32(0x00080000), /* * Right to use the object for synchronization. Enables a process to * wait until the object is in the signalled state. Some object types * do not support this access right. */ - SYNCHRONIZE = const_cpu_to_le32(0x00100000), + SYNCHRONIZE = cpu_to_le32(0x00100000), /* * The following STANDARD_RIGHTS_* are combinations of the above for @@ -1517,25 +1506,25 @@ enum { */ /* These are currently defined to READ_CONTROL. */ - STANDARD_RIGHTS_READ = const_cpu_to_le32(0x00020000), - STANDARD_RIGHTS_WRITE = const_cpu_to_le32(0x00020000), - STANDARD_RIGHTS_EXECUTE = const_cpu_to_le32(0x00020000), + STANDARD_RIGHTS_READ = cpu_to_le32(0x00020000), + STANDARD_RIGHTS_WRITE = cpu_to_le32(0x00020000), + STANDARD_RIGHTS_EXECUTE = cpu_to_le32(0x00020000), /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */ - STANDARD_RIGHTS_REQUIRED = const_cpu_to_le32(0x000f0000), + STANDARD_RIGHTS_REQUIRED = cpu_to_le32(0x000f0000), /* * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and * SYNCHRONIZE access. */ - STANDARD_RIGHTS_ALL = const_cpu_to_le32(0x001f0000), + STANDARD_RIGHTS_ALL = cpu_to_le32(0x001f0000), /* * The access system ACL and maximum allowed access types (bits 24 to * 25, bits 26 to 27 are reserved). */ - ACCESS_SYSTEM_SECURITY = const_cpu_to_le32(0x01000000), - MAXIMUM_ALLOWED = const_cpu_to_le32(0x02000000), + ACCESS_SYSTEM_SECURITY = cpu_to_le32(0x01000000), + MAXIMUM_ALLOWED = cpu_to_le32(0x02000000), /* * The generic rights (bits 28 to 31). These map onto the standard and @@ -1543,10 +1532,10 @@ enum { */ /* Read, write, and execute access. */ - GENERIC_ALL = const_cpu_to_le32(0x10000000), + GENERIC_ALL = cpu_to_le32(0x10000000), /* Execute access. */ - GENERIC_EXECUTE = const_cpu_to_le32(0x20000000), + GENERIC_EXECUTE = cpu_to_le32(0x20000000), /* * Write access. For files, this maps onto: @@ -1555,7 +1544,7 @@ enum { * For directories, the mapping has the same numerical value. See * above for the descriptions of the rights granted. */ - GENERIC_WRITE = const_cpu_to_le32(0x40000000), + GENERIC_WRITE = cpu_to_le32(0x40000000), /* * Read access. For files, this maps onto: @@ -1564,7 +1553,7 @@ enum { * For directories, the mapping has the same numberical value. See * above for the descriptions of the rights granted. */ - GENERIC_READ = const_cpu_to_le32(0x80000000), + GENERIC_READ = cpu_to_le32(0x80000000), }; typedef le32 ACCESS_MASK; @@ -1604,8 +1593,8 @@ typedef struct { * The object ACE flags (32-bit). */ enum { - ACE_OBJECT_TYPE_PRESENT = const_cpu_to_le32(1), - ACE_INHERITED_OBJECT_TYPE_PRESENT = const_cpu_to_le32(2), + ACE_OBJECT_TYPE_PRESENT = cpu_to_le32(1), + ACE_INHERITED_OBJECT_TYPE_PRESENT = cpu_to_le32(2), }; typedef le32 OBJECT_ACE_FLAGS; @@ -1706,23 +1695,23 @@ typedef enum { * expressed as offsets from the beginning of the security descriptor. */ enum { - SE_OWNER_DEFAULTED = const_cpu_to_le16(0x0001), - SE_GROUP_DEFAULTED = const_cpu_to_le16(0x0002), - SE_DACL_PRESENT = const_cpu_to_le16(0x0004), - SE_DACL_DEFAULTED = const_cpu_to_le16(0x0008), - - SE_SACL_PRESENT = const_cpu_to_le16(0x0010), - SE_SACL_DEFAULTED = const_cpu_to_le16(0x0020), - - SE_DACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0100), - SE_SACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0200), - SE_DACL_AUTO_INHERITED = const_cpu_to_le16(0x0400), - SE_SACL_AUTO_INHERITED = const_cpu_to_le16(0x0800), - - SE_DACL_PROTECTED = const_cpu_to_le16(0x1000), - SE_SACL_PROTECTED = const_cpu_to_le16(0x2000), - SE_RM_CONTROL_VALID = const_cpu_to_le16(0x4000), - SE_SELF_RELATIVE = const_cpu_to_le16(0x8000) + SE_OWNER_DEFAULTED = cpu_to_le16(0x0001), + SE_GROUP_DEFAULTED = cpu_to_le16(0x0002), + SE_DACL_PRESENT = cpu_to_le16(0x0004), + SE_DACL_DEFAULTED = cpu_to_le16(0x0008), + + SE_SACL_PRESENT = cpu_to_le16(0x0010), + SE_SACL_DEFAULTED = cpu_to_le16(0x0020), + + SE_DACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0100), + SE_SACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0200), + SE_DACL_AUTO_INHERITED = cpu_to_le16(0x0400), + SE_SACL_AUTO_INHERITED = cpu_to_le16(0x0800), + + SE_DACL_PROTECTED = cpu_to_le16(0x1000), + SE_SACL_PROTECTED = cpu_to_le16(0x2000), + SE_RM_CONTROL_VALID = cpu_to_le16(0x4000), + SE_SELF_RELATIVE = cpu_to_le16(0x8000) } __attribute__ ((__packed__)); typedef le16 SECURITY_DESCRIPTOR_CONTROL; @@ -1910,21 +1899,21 @@ typedef struct { * Possible flags for the volume (16-bit). */ enum { - VOLUME_IS_DIRTY = const_cpu_to_le16(0x0001), - VOLUME_RESIZE_LOG_FILE = const_cpu_to_le16(0x0002), - VOLUME_UPGRADE_ON_MOUNT = const_cpu_to_le16(0x0004), - VOLUME_MOUNTED_ON_NT4 = const_cpu_to_le16(0x0008), + VOLUME_IS_DIRTY = cpu_to_le16(0x0001), + VOLUME_RESIZE_LOG_FILE = cpu_to_le16(0x0002), + VOLUME_UPGRADE_ON_MOUNT = cpu_to_le16(0x0004), + VOLUME_MOUNTED_ON_NT4 = cpu_to_le16(0x0008), - VOLUME_DELETE_USN_UNDERWAY = const_cpu_to_le16(0x0010), - VOLUME_REPAIR_OBJECT_ID = const_cpu_to_le16(0x0020), + VOLUME_DELETE_USN_UNDERWAY = cpu_to_le16(0x0010), + VOLUME_REPAIR_OBJECT_ID = cpu_to_le16(0x0020), - VOLUME_CHKDSK_UNDERWAY = const_cpu_to_le16(0x4000), - VOLUME_MODIFIED_BY_CHKDSK = const_cpu_to_le16(0x8000), + VOLUME_CHKDSK_UNDERWAY = cpu_to_le16(0x4000), + VOLUME_MODIFIED_BY_CHKDSK = cpu_to_le16(0x8000), - VOLUME_FLAGS_MASK = const_cpu_to_le16(0xc03f), + VOLUME_FLAGS_MASK = cpu_to_le16(0xc03f), /* To make our life easier when checking if we must mount read-only. */ - VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0xc027), + VOLUME_MUST_MOUNT_RO_MASK = cpu_to_le16(0xc027), } __attribute__ ((__packed__)); typedef le16 VOLUME_FLAGS; @@ -2109,26 +2098,26 @@ typedef struct { * The user quota flags. Names explain meaning. */ enum { - QUOTA_FLAG_DEFAULT_LIMITS = const_cpu_to_le32(0x00000001), - QUOTA_FLAG_LIMIT_REACHED = const_cpu_to_le32(0x00000002), - QUOTA_FLAG_ID_DELETED = const_cpu_to_le32(0x00000004), + QUOTA_FLAG_DEFAULT_LIMITS = cpu_to_le32(0x00000001), + QUOTA_FLAG_LIMIT_REACHED = cpu_to_le32(0x00000002), + QUOTA_FLAG_ID_DELETED = cpu_to_le32(0x00000004), - QUOTA_FLAG_USER_MASK = const_cpu_to_le32(0x00000007), + QUOTA_FLAG_USER_MASK = cpu_to_le32(0x00000007), /* This is a bit mask for the user quota flags. */ /* * These flags are only present in the quota defaults index entry, i.e. * in the entry where owner_id = QUOTA_DEFAULTS_ID. */ - QUOTA_FLAG_TRACKING_ENABLED = const_cpu_to_le32(0x00000010), - QUOTA_FLAG_ENFORCEMENT_ENABLED = const_cpu_to_le32(0x00000020), - QUOTA_FLAG_TRACKING_REQUESTED = const_cpu_to_le32(0x00000040), - QUOTA_FLAG_LOG_THRESHOLD = const_cpu_to_le32(0x00000080), - - QUOTA_FLAG_LOG_LIMIT = const_cpu_to_le32(0x00000100), - QUOTA_FLAG_OUT_OF_DATE = const_cpu_to_le32(0x00000200), - QUOTA_FLAG_CORRUPT = const_cpu_to_le32(0x00000400), - QUOTA_FLAG_PENDING_DELETES = const_cpu_to_le32(0x00000800), + QUOTA_FLAG_TRACKING_ENABLED = cpu_to_le32(0x00000010), + QUOTA_FLAG_ENFORCEMENT_ENABLED = cpu_to_le32(0x00000020), + QUOTA_FLAG_TRACKING_REQUESTED = cpu_to_le32(0x00000040), + QUOTA_FLAG_LOG_THRESHOLD = cpu_to_le32(0x00000080), + + QUOTA_FLAG_LOG_LIMIT = cpu_to_le32(0x00000100), + QUOTA_FLAG_OUT_OF_DATE = cpu_to_le32(0x00000200), + QUOTA_FLAG_CORRUPT = cpu_to_le32(0x00000400), + QUOTA_FLAG_PENDING_DELETES = cpu_to_le32(0x00000800), }; typedef le32 QUOTA_FLAGS; @@ -2172,9 +2161,9 @@ typedef struct { * Predefined owner_id values (32-bit). */ enum { - QUOTA_INVALID_ID = const_cpu_to_le32(0x00000000), - QUOTA_DEFAULTS_ID = const_cpu_to_le32(0x00000001), - QUOTA_FIRST_USER_ID = const_cpu_to_le32(0x00000100), + QUOTA_INVALID_ID = cpu_to_le32(0x00000000), + QUOTA_DEFAULTS_ID = cpu_to_le32(0x00000001), + QUOTA_FIRST_USER_ID = cpu_to_le32(0x00000100), }; /* @@ -2189,14 +2178,14 @@ typedef enum { * Index entry flags (16-bit). */ enum { - INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a + INDEX_ENTRY_NODE = cpu_to_le16(1), /* This entry contains a sub-node, i.e. a reference to an index block in form of a virtual cluster number (see below). */ - INDEX_ENTRY_END = const_cpu_to_le16(2), /* This signifies the last + INDEX_ENTRY_END = cpu_to_le16(2), /* This signifies the last entry in an index block. The index entry does not represent a file but it can point to a sub-node. */ - INDEX_ENTRY_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force + INDEX_ENTRY_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16-bit. */ } __attribute__ ((__packed__)); @@ -2334,26 +2323,26 @@ typedef struct { * These are the predefined reparse point tags: */ enum { - IO_REPARSE_TAG_IS_ALIAS = const_cpu_to_le32(0x20000000), - IO_REPARSE_TAG_IS_HIGH_LATENCY = const_cpu_to_le32(0x40000000), - IO_REPARSE_TAG_IS_MICROSOFT = const_cpu_to_le32(0x80000000), + IO_REPARSE_TAG_IS_ALIAS = cpu_to_le32(0x20000000), + IO_REPARSE_TAG_IS_HIGH_LATENCY = cpu_to_le32(0x40000000), + IO_REPARSE_TAG_IS_MICROSOFT = cpu_to_le32(0x80000000), - IO_REPARSE_TAG_RESERVED_ZERO = const_cpu_to_le32(0x00000000), - IO_REPARSE_TAG_RESERVED_ONE = const_cpu_to_le32(0x00000001), - IO_REPARSE_TAG_RESERVED_RANGE = const_cpu_to_le32(0x00000001), + IO_REPARSE_TAG_RESERVED_ZERO = cpu_to_le32(0x00000000), + IO_REPARSE_TAG_RESERVED_ONE = cpu_to_le32(0x00000001), + IO_REPARSE_TAG_RESERVED_RANGE = cpu_to_le32(0x00000001), - IO_REPARSE_TAG_NSS = const_cpu_to_le32(0x68000005), - IO_REPARSE_TAG_NSS_RECOVER = const_cpu_to_le32(0x68000006), - IO_REPARSE_TAG_SIS = const_cpu_to_le32(0x68000007), - IO_REPARSE_TAG_DFS = const_cpu_to_le32(0x68000008), + IO_REPARSE_TAG_NSS = cpu_to_le32(0x68000005), + IO_REPARSE_TAG_NSS_RECOVER = cpu_to_le32(0x68000006), + IO_REPARSE_TAG_SIS = cpu_to_le32(0x68000007), + IO_REPARSE_TAG_DFS = cpu_to_le32(0x68000008), - IO_REPARSE_TAG_MOUNT_POINT = const_cpu_to_le32(0x88000003), + IO_REPARSE_TAG_MOUNT_POINT = cpu_to_le32(0x88000003), - IO_REPARSE_TAG_HSM = const_cpu_to_le32(0xa8000004), + IO_REPARSE_TAG_HSM = cpu_to_le32(0xa8000004), - IO_REPARSE_TAG_SYMBOLIC_LINK = const_cpu_to_le32(0xe8000000), + IO_REPARSE_TAG_SYMBOLIC_LINK = cpu_to_le32(0xe8000000), - IO_REPARSE_TAG_VALID_VALUES = const_cpu_to_le32(0xe000ffff), + IO_REPARSE_TAG_VALID_VALUES = cpu_to_le32(0xe000ffff), }; /* diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h index 9468e1c45ae3..b5a6f08bd35c 100644 --- a/fs/ntfs/logfile.h +++ b/fs/ntfs/logfile.h @@ -104,7 +104,7 @@ typedef struct { * in this particular client array. Also inside the client records themselves, * this means that there are no client records preceding or following this one. */ -#define LOGFILE_NO_CLIENT const_cpu_to_le16(0xffff) +#define LOGFILE_NO_CLIENT cpu_to_le16(0xffff) #define LOGFILE_NO_CLIENT_CPU 0xffff /* @@ -112,8 +112,8 @@ typedef struct { * information about the log file in which they are present. */ enum { - RESTART_VOLUME_IS_CLEAN = const_cpu_to_le16(0x0002), - RESTART_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */ + RESTART_VOLUME_IS_CLEAN = cpu_to_le16(0x0002), + RESTART_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */ } __attribute__ ((__packed__)); typedef le16 RESTART_AREA_FLAGS; diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 17d32ca6bc35..23bf68453d7d 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -2839,7 +2839,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m) */ /* Mark the mft record as not in use. */ - m->flags &= const_cpu_to_le16(~const_le16_to_cpu(MFT_RECORD_IN_USE)); + m->flags &= ~MFT_RECORD_IN_USE; /* Increment the sequence number, skipping zero, if it is not zero. */ old_seq_no = m->sequence_number; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 4a46743b5077..f76951dcd4a6 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -618,7 +618,7 @@ static bool is_boot_sector_ntfs(const struct super_block *sb, * many BIOSes will refuse to boot from a bootsector if the magic is * incorrect, so we emit a warning. */ - if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55)) + if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55)) ntfs_warning(sb, "Invalid end of sector marker."); return true; not_ntfs: @@ -1242,13 +1242,13 @@ static int check_windows_hibernation_status(ntfs_volume *vol) u32 *kaddr, *kend; ntfs_name *name = NULL; int ret = 1; - static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'), - const_cpu_to_le16('i'), const_cpu_to_le16('b'), - const_cpu_to_le16('e'), const_cpu_to_le16('r'), - const_cpu_to_le16('f'), const_cpu_to_le16('i'), - const_cpu_to_le16('l'), const_cpu_to_le16('.'), - const_cpu_to_le16('s'), const_cpu_to_le16('y'), - const_cpu_to_le16('s'), 0 }; + static const ntfschar hiberfil[13] = { cpu_to_le16('h'), + cpu_to_le16('i'), cpu_to_le16('b'), + cpu_to_le16('e'), cpu_to_le16('r'), + cpu_to_le16('f'), cpu_to_le16('i'), + cpu_to_le16('l'), cpu_to_le16('.'), + cpu_to_le16('s'), cpu_to_le16('y'), + cpu_to_le16('s'), 0 }; ntfs_debug("Entering."); /* @@ -1296,7 +1296,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol) goto iput_out; } kaddr = (u32*)page_address(page); - if (*(le32*)kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) { + if (*(le32*)kaddr == cpu_to_le32(0x72626968)/*'hibr'*/) { ntfs_debug("Magic \"hibr\" found in hiberfil.sys. Windows is " "hibernated on the volume. This is the " "system volume."); @@ -1337,12 +1337,12 @@ static bool load_and_init_quota(ntfs_volume *vol) MFT_REF mref; struct inode *tmp_ino; ntfs_name *name = NULL; - static const ntfschar Quota[7] = { const_cpu_to_le16('$'), - const_cpu_to_le16('Q'), const_cpu_to_le16('u'), - const_cpu_to_le16('o'), const_cpu_to_le16('t'), - const_cpu_to_le16('a'), 0 }; - static ntfschar Q[3] = { const_cpu_to_le16('$'), - const_cpu_to_le16('Q'), 0 }; + static const ntfschar Quota[7] = { cpu_to_le16('$'), + cpu_to_le16('Q'), cpu_to_le16('u'), + cpu_to_le16('o'), cpu_to_le16('t'), + cpu_to_le16('a'), 0 }; + static ntfschar Q[3] = { cpu_to_le16('$'), + cpu_to_le16('Q'), 0 }; ntfs_debug("Entering."); /* @@ -1416,16 +1416,16 @@ static bool load_and_init_usnjrnl(ntfs_volume *vol) struct page *page; ntfs_name *name = NULL; USN_HEADER *uh; - static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'), - const_cpu_to_le16('U'), const_cpu_to_le16('s'), - const_cpu_to_le16('n'), const_cpu_to_le16('J'), - const_cpu_to_le16('r'), const_cpu_to_le16('n'), - const_cpu_to_le16('l'), 0 }; - static ntfschar Max[5] = { const_cpu_to_le16('$'), - const_cpu_to_le16('M'), const_cpu_to_le16('a'), - const_cpu_to_le16('x'), 0 }; - static ntfschar J[3] = { const_cpu_to_le16('$'), - const_cpu_to_le16('J'), 0 }; + static const ntfschar UsnJrnl[9] = { cpu_to_le16('$'), + cpu_to_le16('U'), cpu_to_le16('s'), + cpu_to_le16('n'), cpu_to_le16('J'), + cpu_to_le16('r'), cpu_to_le16('n'), + cpu_to_le16('l'), 0 }; + static ntfschar Max[5] = { cpu_to_le16('$'), + cpu_to_le16('M'), cpu_to_le16('a'), + cpu_to_le16('x'), 0 }; + static ntfschar J[3] = { cpu_to_le16('$'), + cpu_to_le16('J'), 0 }; ntfs_debug("Entering."); /* diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index 4087fbdac327..00d8e6bd7c36 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h @@ -116,27 +116,27 @@ typedef struct { * documentation: http://www.linux-ntfs.org/ */ enum { - USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), - USN_REASON_DATA_EXTEND = const_cpu_to_le32(0x00000002), - USN_REASON_DATA_TRUNCATION = const_cpu_to_le32(0x00000004), - USN_REASON_NAMED_DATA_OVERWRITE = const_cpu_to_le32(0x00000010), - USN_REASON_NAMED_DATA_EXTEND = const_cpu_to_le32(0x00000020), - USN_REASON_NAMED_DATA_TRUNCATION= const_cpu_to_le32(0x00000040), - USN_REASON_FILE_CREATE = const_cpu_to_le32(0x00000100), - USN_REASON_FILE_DELETE = const_cpu_to_le32(0x00000200), - USN_REASON_EA_CHANGE = const_cpu_to_le32(0x00000400), - USN_REASON_SECURITY_CHANGE = const_cpu_to_le32(0x00000800), - USN_REASON_RENAME_OLD_NAME = const_cpu_to_le32(0x00001000), - USN_REASON_RENAME_NEW_NAME = const_cpu_to_le32(0x00002000), - USN_REASON_INDEXABLE_CHANGE = const_cpu_to_le32(0x00004000), - USN_REASON_BASIC_INFO_CHANGE = const_cpu_to_le32(0x00008000), - USN_REASON_HARD_LINK_CHANGE = const_cpu_to_le32(0x00010000), - USN_REASON_COMPRESSION_CHANGE = const_cpu_to_le32(0x00020000), - USN_REASON_ENCRYPTION_CHANGE = const_cpu_to_le32(0x00040000), - USN_REASON_OBJECT_ID_CHANGE = const_cpu_to_le32(0x00080000), - USN_REASON_REPARSE_POINT_CHANGE = const_cpu_to_le32(0x00100000), - USN_REASON_STREAM_CHANGE = const_cpu_to_le32(0x00200000), - USN_REASON_CLOSE = const_cpu_to_le32(0x80000000), + USN_REASON_DATA_OVERWRITE = cpu_to_le32(0x00000001), + USN_REASON_DATA_EXTEND = cpu_to_le32(0x00000002), + USN_REASON_DATA_TRUNCATION = cpu_to_le32(0x00000004), + USN_REASON_NAMED_DATA_OVERWRITE = cpu_to_le32(0x00000010), + USN_REASON_NAMED_DATA_EXTEND = cpu_to_le32(0x00000020), + USN_REASON_NAMED_DATA_TRUNCATION= cpu_to_le32(0x00000040), + USN_REASON_FILE_CREATE = cpu_to_le32(0x00000100), + USN_REASON_FILE_DELETE = cpu_to_le32(0x00000200), + USN_REASON_EA_CHANGE = cpu_to_le32(0x00000400), + USN_REASON_SECURITY_CHANGE = cpu_to_le32(0x00000800), + USN_REASON_RENAME_OLD_NAME = cpu_to_le32(0x00001000), + USN_REASON_RENAME_NEW_NAME = cpu_to_le32(0x00002000), + USN_REASON_INDEXABLE_CHANGE = cpu_to_le32(0x00004000), + USN_REASON_BASIC_INFO_CHANGE = cpu_to_le32(0x00008000), + USN_REASON_HARD_LINK_CHANGE = cpu_to_le32(0x00010000), + USN_REASON_COMPRESSION_CHANGE = cpu_to_le32(0x00020000), + USN_REASON_ENCRYPTION_CHANGE = cpu_to_le32(0x00040000), + USN_REASON_OBJECT_ID_CHANGE = cpu_to_le32(0x00080000), + USN_REASON_REPARSE_POINT_CHANGE = cpu_to_le32(0x00100000), + USN_REASON_STREAM_CHANGE = cpu_to_le32(0x00200000), + USN_REASON_CLOSE = cpu_to_le32(0x80000000), }; typedef le32 USN_REASON_FLAGS; @@ -148,9 +148,9 @@ typedef le32 USN_REASON_FLAGS; * http://www.linux-ntfs.org/ */ enum { - USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), - USN_SOURCE_AUXILIARY_DATA = const_cpu_to_le32(0x00000002), - USN_SOURCE_REPLICATION_MANAGEMENT = const_cpu_to_le32(0x00000004), + USN_SOURCE_DATA_MANAGEMENT = cpu_to_le32(0x00000001), + USN_SOURCE_AUXILIARY_DATA = cpu_to_le32(0x00000002), + USN_SOURCE_REPLICATION_MANAGEMENT = cpu_to_le32(0x00000004), }; typedef le32 USN_SOURCE_INFO_FLAGS; diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index eea1d24713ea..b606496b72ec 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -154,8 +154,9 @@ out: return ret; } -static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct buffer_head *di_bh = NULL; sigset_t blocked, oldset; @@ -196,7 +197,8 @@ out: ret2 = ocfs2_vm_op_unblock_sigs(&oldset); if (ret2 < 0) mlog_errno(ret2); - + if (ret) + ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 4a9e0f65ae60..83adcc869437 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -144,16 +144,12 @@ void proc_tty_register_driver(struct tty_driver *driver) { struct proc_dir_entry *ent; - if (!driver->ops->read_proc || !driver->driver_name || - driver->proc_entry) + if (!driver->driver_name || driver->proc_entry || + !driver->ops->proc_fops) return; - ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); - if (!ent) - return; - ent->read_proc = driver->ops->read_proc; - ent->data = driver; - + ent = proc_create_data(driver->driver_name, 0, proc_tty_driver, + driver->ops->proc_fops, driver); driver->proc_entry = ent; } diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 995ef1d6686c..ebb2c417912c 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -59,7 +59,6 @@ const struct inode_operations ramfs_file_inode_operations = { */ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) { - struct pagevec lru_pvec; unsigned long npages, xpages, loop, limit; struct page *pages; unsigned order; @@ -102,24 +101,20 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) memset(data, 0, newsize); /* attach all the pages to the inode's address space */ - pagevec_init(&lru_pvec, 0); for (loop = 0; loop < npages; loop++) { struct page *page = pages + loop; - ret = add_to_page_cache(page, inode->i_mapping, loop, GFP_KERNEL); + ret = add_to_page_cache_lru(page, inode->i_mapping, loop, + GFP_KERNEL); if (ret < 0) goto add_error; - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add_file(&lru_pvec); - /* prevent the page from being discarded on memory pressure */ SetPageDirty(page); unlock_page(page); } - pagevec_lru_add_file(&lru_pvec); return 0; fsize_exceeded: @@ -128,10 +123,8 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) return -EFBIG; add_error: - pagevec_lru_add_file(&lru_pvec); - page_cache_release(pages + loop); - for (loop++; loop < npages; loop++) - __free_page(pages + loop); + while (loop < npages) + __free_page(pages + loop++); return ret; } diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index b7e6ac706b87..a404fb88e456 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -33,12 +33,15 @@ #include <linux/backing-dev.h> #include <linux/ramfs.h> #include <linux/sched.h> +#include <linux/parser.h> #include <asm/uaccess.h> #include "internal.h" /* some random number */ #define RAMFS_MAGIC 0x858458f6 +#define RAMFS_DEFAULT_MODE 0755 + static const struct super_operations ramfs_ops; static const struct inode_operations ramfs_dir_inode_operations; @@ -158,12 +161,75 @@ static const struct inode_operations ramfs_dir_inode_operations = { static const struct super_operations ramfs_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, + .show_options = generic_show_options, +}; + +struct ramfs_mount_opts { + umode_t mode; +}; + +enum { + Opt_mode, + Opt_err +}; + +static const match_table_t tokens = { + {Opt_mode, "mode=%o"}, + {Opt_err, NULL} +}; + +struct ramfs_fs_info { + struct ramfs_mount_opts mount_opts; }; +static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts) +{ + substring_t args[MAX_OPT_ARGS]; + int option; + int token; + char *p; + + opts->mode = RAMFS_DEFAULT_MODE; + + while ((p = strsep(&data, ",")) != NULL) { + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_mode: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->mode = option & S_IALLUGO; + break; + default: + printk(KERN_ERR "ramfs: bad mount option: %s\n", p); + return -EINVAL; + } + } + + return 0; +} + static int ramfs_fill_super(struct super_block * sb, void * data, int silent) { - struct inode * inode; - struct dentry * root; + struct ramfs_fs_info *fsi; + struct inode *inode = NULL; + struct dentry *root; + int err; + + save_mount_options(sb, data); + + fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); + if (!fsi) { + err = -ENOMEM; + goto fail; + } + sb->s_fs_info = fsi; + + err = ramfs_parse_options(data, &fsi->mount_opts); + if (err) + goto fail; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; @@ -171,17 +237,23 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_magic = RAMFS_MAGIC; sb->s_op = &ramfs_ops; sb->s_time_gran = 1; - inode = ramfs_get_inode(sb, S_IFDIR | 0755, 0); - if (!inode) - return -ENOMEM; + inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0); + if (!inode) { + err = -ENOMEM; + goto fail; + } root = d_alloc_root(inode); if (!root) { - iput(inode); - return -ENOMEM; + err = -ENOMEM; + goto fail; } sb->s_root = root; return 0; +fail: + kfree(fsi); + iput(inode); + return err; } int ramfs_get_sb(struct file_system_type *fs_type, @@ -197,10 +269,16 @@ static int rootfs_get_sb(struct file_system_type *fs_type, mnt); } +static void ramfs_kill_sb(struct super_block *sb) +{ + kfree(sb->s_fs_info); + kill_litter_super(sb); +} + static struct file_system_type ramfs_fs_type = { .name = "ramfs", .get_sb = ramfs_get_sb, - .kill_sb = kill_litter_super, + .kill_sb = ramfs_kill_sb, }; static struct file_system_type rootfs_fs_type = { .name = "rootfs", diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 07703d3ff4a1..93e0c0281d45 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -234,7 +234,7 @@ static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return ret; } -static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct file *file = vma->vm_file; struct bin_buffer *bb = file->private_data; @@ -242,15 +242,15 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page) int ret; if (!bb->vm_ops) - return -EINVAL; + return VM_FAULT_SIGBUS; if (!bb->vm_ops->page_mkwrite) return 0; if (!sysfs_get_active_two(attr_sd)) - return -EINVAL; + return VM_FAULT_SIGBUS; - ret = bb->vm_ops->page_mkwrite(vma, page); + ret = bb->vm_ops->page_mkwrite(vma, vmf); sysfs_put_active_two(attr_sd); return ret; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 93b6de51f261..0ff89fe71e51 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1434,8 +1434,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) * mmap()d file has taken write protection fault and is being made * writable. UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); @@ -1447,7 +1448,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); if (unlikely(c->ro_media)) - return -EROFS; + return VM_FAULT_SIGBUS; /* -EROFS */ /* * We have not locked @page so far so we may budget for changing the @@ -1480,7 +1481,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) if (err == -ENOSPC) ubifs_warn("out of space for mmapped file " "(inode number %lu)", inode->i_ino); - return err; + return VM_FAULT_SIGBUS; } lock_page(page); @@ -1520,6 +1521,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) out_unlock: unlock_page(page); ubifs_release_budget(c, &req); + if (err) + err = VM_FAULT_SIGBUS; return err; } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index e14c4e3aea0c..f4e255441574 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -234,9 +234,9 @@ xfs_file_mmap( STATIC int xfs_vm_page_mkwrite( struct vm_area_struct *vma, - struct page *page) + struct vm_fault *vmf) { - return block_page_mkwrite(vma, page, xfs_get_blocks); + return block_page_mkwrite(vma, vmf, xfs_get_blocks); } const struct file_operations xfs_file_operations = { diff --git a/include/asm-frv/highmem.h b/include/asm-frv/highmem.h index 26cefcde5cee..68e4677fb9e7 100644 --- a/include/asm-frv/highmem.h +++ b/include/asm-frv/highmem.h @@ -18,6 +18,7 @@ #ifdef __KERNEL__ #include <linux/init.h> +#include <linux/highmem.h> #include <asm/mem-layout.h> #include <asm/spr-regs.h> #include <asm/mb-regs.h> @@ -116,6 +117,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) unsigned long paddr; pagefault_disable(); + debug_kmap_atomic(type); paddr = page_to_phys(page); switch (type) { diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h deleted file mode 100644 index 189486c3f92e..000000000000 --- a/include/asm-generic/dma-mapping.h +++ /dev/null @@ -1,308 +0,0 @@ -/* Copyright (C) 2002 by James.Bottomley@HansenPartnership.com - * - * Implements the generic device dma API via the existing pci_ one - * for unconverted architectures - */ - -#ifndef _ASM_GENERIC_DMA_MAPPING_H -#define _ASM_GENERIC_DMA_MAPPING_H - - -#ifdef CONFIG_PCI - -/* we implement the API below in terms of the existing PCI one, - * so include it */ -#include <linux/pci.h> -/* need struct page definitions */ -#include <linux/mm.h> - -static inline int -dma_supported(struct device *dev, u64 mask) -{ - BUG_ON(dev->bus != &pci_bus_type); - - return pci_dma_supported(to_pci_dev(dev), mask); -} - -static inline int -dma_set_mask(struct device *dev, u64 dma_mask) -{ - BUG_ON(dev->bus != &pci_bus_type); - - return pci_set_dma_mask(to_pci_dev(dev), dma_mask); -} - -static inline void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flag) -{ - BUG_ON(dev->bus != &pci_bus_type); - - return pci_alloc_consistent(to_pci_dev(dev), size, dma_handle); -} - -static inline void -dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle) -{ - BUG_ON(dev->bus != &pci_bus_type); - - pci_free_consistent(to_pci_dev(dev), size, cpu_addr, dma_handle); -} - -static inline dma_addr_t -dma_map_single(struct device *dev, void *cpu_addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - return pci_map_single(to_pci_dev(dev), cpu_addr, size, (int)direction); -} - -static inline void -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - pci_unmap_single(to_pci_dev(dev), dma_addr, size, (int)direction); -} - -static inline dma_addr_t -dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - return pci_map_page(to_pci_dev(dev), page, offset, size, (int)direction); -} - -static inline void -dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - pci_unmap_page(to_pci_dev(dev), dma_address, size, (int)direction); -} - -static inline int -dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - return pci_map_sg(to_pci_dev(dev), sg, nents, (int)direction); -} - -static inline void -dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - pci_unmap_sg(to_pci_dev(dev), sg, nhwentries, (int)direction); -} - -static inline void -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - pci_dma_sync_single_for_cpu(to_pci_dev(dev), dma_handle, - size, (int)direction); -} - -static inline void -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - pci_dma_sync_single_for_device(to_pci_dev(dev), dma_handle, - size, (int)direction); -} - -static inline void -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - pci_dma_sync_sg_for_cpu(to_pci_dev(dev), sg, nelems, (int)direction); -} - -static inline void -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG_ON(dev->bus != &pci_bus_type); - - pci_dma_sync_sg_for_device(to_pci_dev(dev), sg, nelems, (int)direction); -} - -static inline int -dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return pci_dma_mapping_error(to_pci_dev(dev), dma_addr); -} - - -#else - -static inline int -dma_supported(struct device *dev, u64 mask) -{ - return 0; -} - -static inline int -dma_set_mask(struct device *dev, u64 dma_mask) -{ - BUG(); - return 0; -} - -static inline void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flag) -{ - BUG(); - return NULL; -} - -static inline void -dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle) -{ - BUG(); -} - -static inline dma_addr_t -dma_map_single(struct device *dev, void *cpu_addr, size_t size, - enum dma_data_direction direction) -{ - BUG(); - return 0; -} - -static inline void -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline dma_addr_t -dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - BUG(); - return 0; -} - -static inline void -dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline int -dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - BUG(); - return 0; -} - -static inline void -dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline void -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline void -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline void -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline void -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG(); -} - -static inline int -dma_error(dma_addr_t dma_addr) -{ - return 0; -} - -#endif - -/* Now for the API extensions over the pci_ one */ - -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(d, h) (1) - -static inline int -dma_get_cache_alignment(void) -{ - /* no easy way to get cache size on all processors, so return - * the maximum possible, to be safe */ - return (1 << INTERNODE_CACHE_SHIFT); -} - -static inline void -dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - /* just sync everything, that's all the pci API can do */ - dma_sync_single_for_cpu(dev, dma_handle, offset+size, direction); -} - -static inline void -dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - /* just sync everything, that's all the pci API can do */ - dma_sync_single_for_device(dev, dma_handle, offset+size, direction); -} - -static inline void -dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) -{ - /* could define this in terms of the dma_cache ... operations, - * but if you get this on a platform, you should convert the platform - * to using the generic device DMA API */ - BUG(); -} - -#endif - diff --git a/include/asm-mn10300/highmem.h b/include/asm-mn10300/highmem.h index 5256854c0453..90f2abb04bfd 100644 --- a/include/asm-mn10300/highmem.h +++ b/include/asm-mn10300/highmem.h @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/interrupt.h> +#include <linux/highmem.h> #include <asm/kmap_types.h> #include <asm/pgtable.h> @@ -77,6 +78,7 @@ static inline unsigned long kmap_atomic(struct page *page, enum km_type type) if (page < highmem_start_page) return page_address(page); + debug_kmap_atomic(type); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); #if HIGHMEM_DEBUG diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 78199151c00b..d047f846c3ed 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -257,6 +257,40 @@ void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); #endif /* CONFIG_PM_SLEEP */ + +#define OSC_QUERY_TYPE 0 +#define OSC_SUPPORT_TYPE 1 +#define OSC_CONTROL_TYPE 2 +#define OSC_SUPPORT_MASKS 0x1f + +/* _OSC DW0 Definition */ +#define OSC_QUERY_ENABLE 1 +#define OSC_REQUEST_ERROR 2 +#define OSC_INVALID_UUID_ERROR 4 +#define OSC_INVALID_REVISION_ERROR 8 +#define OSC_CAPABILITIES_MASK_ERROR 16 + +/* _OSC DW1 Definition (OS Support Fields) */ +#define OSC_EXT_PCI_CONFIG_SUPPORT 1 +#define OSC_ACTIVE_STATE_PWR_SUPPORT 2 +#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4 +#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8 +#define OSC_MSI_SUPPORT 16 + +/* _OSC DW1 Definition (OS Control Fields) */ +#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1 +#define OSC_SHPC_NATIVE_HP_CONTROL 2 +#define OSC_PCI_EXPRESS_PME_CONTROL 4 +#define OSC_PCI_EXPRESS_AER_CONTROL 8 +#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16 + +#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \ + OSC_SHPC_NATIVE_HP_CONTROL | \ + OSC_PCI_EXPRESS_PME_CONTROL | \ + OSC_PCI_EXPRESS_AER_CONTROL | \ + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) + +extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); #else /* CONFIG_ACPI */ static inline int early_acpi_boot_init(void) diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index 91a773993a5c..850f39b33e74 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -10,8 +10,13 @@ #ifndef _LINUX_AUTO_DEV_IOCTL_H #define _LINUX_AUTO_DEV_IOCTL_H +#include <linux/auto_fs.h> + +#ifdef __KERNEL__ #include <linux/string.h> -#include <linux/types.h> +#else +#include <string.h> +#endif /* __KERNEL__ */ #define AUTOFS_DEVICE_NAME "autofs" diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index c21e5972a3e8..63265852b7d1 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h @@ -17,11 +17,13 @@ #ifdef __KERNEL__ #include <linux/fs.h> #include <linux/limits.h> +#include <linux/types.h> +#include <linux/ioctl.h> +#else #include <asm/types.h> +#include <sys/ioctl.h> #endif /* __KERNEL__ */ -#include <linux/ioctl.h> - /* This file describes autofs v3 */ #define AUTOFS_PROTO_VERSION 3 diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 455d83219fae..bc3ab7073695 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -146,10 +146,10 @@ extern void *alloc_large_system_hash(const char *tablename, #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ -/* Only NUMA needs hash distribution. - * IA64 and x86_64 have sufficient vmalloc space. +/* Only NUMA needs hash distribution. 64bit NUMA architectures have + * sufficient vmalloc space. */ -#if defined(CONFIG_NUMA) && (defined(CONFIG_IA64) || defined(CONFIG_X86_64)) +#if defined(CONFIG_NUMA) && defined(CONFIG_64BIT) #define HASHDIST_DEFAULT 1 #else #define HASHDIST_DEFAULT 0 diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f19fd9045ea0..3d7bcde2e332 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -216,7 +216,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index a667637b54e3..f45a8ae5f828 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -13,10 +13,20 @@ /* For O_CLOEXEC and O_NONBLOCK */ #include <linux/fcntl.h> -/* Flags for eventfd2. */ +/* + * CAREFUL: Check include/asm-generic/fcntl.h when defining + * new flags, since they might collide with O_* ones. We want + * to re-use O_* flags that couldn't possibly have a meaning + * from eventfd, in order to leave a free define-space for + * shared O_* flags. + */ +#define EFD_SEMAPHORE (1 << 0) #define EFD_CLOEXEC O_CLOEXEC #define EFD_NONBLOCK O_NONBLOCK +#define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) +#define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE) + struct file *eventfd_fget(int fd); int eventfd_signal(struct file *file, int n); diff --git a/include/linux/fb.h b/include/linux/fb.h index 31527e17076b..f563c5013932 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -123,6 +123,7 @@ struct dentry; #define FB_ACCEL_TRIDENT_3DIMAGE 51 /* Trident 3DImage */ #define FB_ACCEL_TRIDENT_BLADE3D 52 /* Trident Blade3D */ #define FB_ACCEL_TRIDENT_BLADEXP 53 /* Trident BladeXP */ +#define FB_ACCEL_CIRRUS_ALPINE 53 /* Cirrus Logic 543x/544x/5480 */ #define FB_ACCEL_NEOMAGIC_NM2070 90 /* NeoMagic NM2070 */ #define FB_ACCEL_NEOMAGIC_NM2090 91 /* NeoMagic NM2090 */ #define FB_ACCEL_NEOMAGIC_NM2093 92 /* NeoMagic NM2093 */ @@ -960,15 +961,7 @@ extern struct fb_info *registered_fb[FB_MAX]; extern int num_registered_fb; extern struct class *fb_class; -static inline int lock_fb_info(struct fb_info *info) -{ - mutex_lock(&info->lock); - if (!info->fbops) { - mutex_unlock(&info->lock); - return 0; - } - return 1; -} +extern int lock_fb_info(struct fb_info *info); static inline void unlock_fb_info(struct fb_info *info) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 87e7bfc5ebd7..61211ad823fe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1878,6 +1878,7 @@ extern struct block_device *open_by_devnum(dev_t, fmode_t); extern void invalidate_bdev(struct block_device *); extern int sync_blockdev(struct block_device *bdev); extern struct super_block *freeze_bdev(struct block_device *); +extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); extern int fsync_super(struct super_block *); diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index d9051d717d27..7ef1caf50269 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -95,14 +95,15 @@ struct fsl_usb2_platform_data { #define FSL_USB2_PORT0_ENABLED 0x00000001 #define FSL_USB2_PORT1_ENABLED 0x00000002 +struct spi_device; + struct fsl_spi_platform_data { u32 initial_spmode; /* initial SPMODE value */ - u16 bus_num; + s16 bus_num; bool qe_mode; /* board specific information */ u16 max_chipselect; - void (*activate_cs)(u8 cs, u8 polarity); - void (*deactivate_cs)(u8 cs, u8 polarity); + void (*cs_control)(struct spi_device *spi, bool on); u32 sysclk; }; diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 13875ce9112a..7ff5c55f9b55 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -187,4 +187,16 @@ static inline void copy_highpage(struct page *to, struct page *from) kunmap_atomic(vto, KM_USER1); } +#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT) + +void debug_kmap_atomic(enum km_type type); + +#else + +static inline void debug_kmap_atomic(enum km_type type) +{ +} + +#endif + #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/ide.h b/include/linux/ide.h index d5d832271f44..a5d26f66ef78 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -265,7 +265,7 @@ enum { IDE_TFLAG_WRITE = (1 << 12), IDE_TFLAG_CUSTOM_HANDLER = (1 << 13), IDE_TFLAG_DMA_PIO_FALLBACK = (1 << 14), - IDE_TFLAG_IN_HOB_FEATURE = (1 << 15), + IDE_TFLAG_IN_HOB_ERROR = (1 << 15), IDE_TFLAG_IN_HOB_NSECT = (1 << 16), IDE_TFLAG_IN_HOB_LBAL = (1 << 17), IDE_TFLAG_IN_HOB_LBAM = (1 << 18), @@ -273,10 +273,10 @@ enum { IDE_TFLAG_IN_HOB_LBA = IDE_TFLAG_IN_HOB_LBAL | IDE_TFLAG_IN_HOB_LBAM | IDE_TFLAG_IN_HOB_LBAH, - IDE_TFLAG_IN_HOB = IDE_TFLAG_IN_HOB_FEATURE | + IDE_TFLAG_IN_HOB = IDE_TFLAG_IN_HOB_ERROR | IDE_TFLAG_IN_HOB_NSECT | IDE_TFLAG_IN_HOB_LBA, - IDE_TFLAG_IN_FEATURE = (1 << 20), + IDE_TFLAG_IN_ERROR = (1 << 20), IDE_TFLAG_IN_NSECT = (1 << 21), IDE_TFLAG_IN_LBAL = (1 << 22), IDE_TFLAG_IN_LBAM = (1 << 23), @@ -310,8 +310,12 @@ enum { struct ide_taskfile { u8 hob_data; /* 0: high data byte (for TASKFILE IOCTL) */ + /* 1-5: additional data to support LBA48 */ + union { + u8 hob_error; /* read: error */ + u8 hob_feature; /* write: feature */ + }; - u8 hob_feature; /* 1-5: additional data to support LBA48 */ u8 hob_nsect; u8 hob_lbal; u8 hob_lbam; @@ -352,6 +356,8 @@ struct ide_cmd { unsigned int nbytes; unsigned int nleft; + unsigned int last_xfer_len; + struct scatterlist *cursg; unsigned int cursg_ofs; @@ -375,7 +381,7 @@ enum { * With each packet command, we allocate a buffer of IDE_PC_BUFFER_SIZE bytes. * This is used for several packet commands (not for READ/WRITE commands). */ -#define IDE_PC_BUFFER_SIZE 256 +#define IDE_PC_BUFFER_SIZE 64 #define ATAPI_WAIT_PC (60 * HZ) struct ide_atapi_pc { @@ -413,9 +419,6 @@ struct ide_atapi_pc { struct idetape_bh *bh; char *b_data; - struct scatterlist *sg; - unsigned int sg_cnt; - unsigned long timeout; }; @@ -456,11 +459,6 @@ enum { IDE_AFLAG_TOCADDR_AS_BCD = (1 << 3), /* TOC track numbers are in BCD. */ IDE_AFLAG_TOCTRACKS_AS_BCD = (1 << 4), - /* - * Drive does not provide data in multiples of SECTOR_SIZE - * when more than one interrupt is needed. - */ - IDE_AFLAG_LIMIT_NFRAMES = (1 << 5), /* Saved TOC information is current. */ IDE_AFLAG_TOC_VALID = (1 << 6), /* We think that the drive door is locked. */ @@ -605,7 +603,7 @@ struct ide_drive_s { unsigned int bios_cyl; /* BIOS/fdisk/LILO number of cyls */ unsigned int cyl; /* "real" number of cyls */ - unsigned int drive_data; /* used by set_pio_mode/selectproc */ + unsigned int drive_data; /* used by set_pio_mode/dev_select() */ unsigned int failures; /* current failure count */ unsigned int max_failures; /* maximum allowed failure count */ u64 probed_capacity;/* initial reported media capacity (ide-cd only currently) */ @@ -661,9 +659,9 @@ struct ide_tp_ops { void (*exec_command)(struct hwif_s *, u8); u8 (*read_status)(struct hwif_s *); u8 (*read_altstatus)(struct hwif_s *); + void (*write_devctl)(struct hwif_s *, u8); - void (*set_irq)(struct hwif_s *, int); - + void (*dev_select)(ide_drive_t *); void (*tf_load)(ide_drive_t *, struct ide_cmd *); void (*tf_read)(ide_drive_t *, struct ide_cmd *); @@ -681,7 +679,6 @@ extern const struct ide_tp_ops default_tp_ops; * @init_dev: host specific initialization of a device * @set_pio_mode: routine to program host for PIO mode * @set_dma_mode: routine to program host for DMA mode - * @selectproc: tweaks hardware to select drive * @reset_poll: chipset polling based on hba specifics * @pre_reset: chipset specific changes to default for device-hba resets * @resetproc: routine to reset controller after a disk reset @@ -698,7 +695,6 @@ struct ide_port_ops { void (*init_dev)(ide_drive_t *); void (*set_pio_mode)(ide_drive_t *, const u8); void (*set_dma_mode)(ide_drive_t *, const u8); - void (*selectproc)(ide_drive_t *); int (*reset_poll)(ide_drive_t *); void (*pre_reset)(ide_drive_t *); void (*resetproc)(ide_drive_t *); @@ -719,8 +715,10 @@ struct ide_dma_ops { int (*dma_end)(struct ide_drive_s *); int (*dma_test_irq)(struct ide_drive_s *); void (*dma_lost_irq)(struct ide_drive_s *); + /* below ones are optional */ + int (*dma_check)(struct ide_drive_s *, struct ide_cmd *); int (*dma_timer_expiry)(struct ide_drive_s *); - void (*dma_timeout)(struct ide_drive_s *); + void (*dma_clear)(struct ide_drive_s *); /* * The following method is optional and only required to be * implemented for the SFF-8038i compatible controllers. @@ -1169,18 +1167,15 @@ void ide_tf_dump(const char *, struct ide_taskfile *); void ide_exec_command(ide_hwif_t *, u8); u8 ide_read_status(ide_hwif_t *); u8 ide_read_altstatus(ide_hwif_t *); +void ide_write_devctl(ide_hwif_t *, u8); -void ide_set_irq(ide_hwif_t *, int); - +void ide_dev_select(ide_drive_t *); void ide_tf_load(ide_drive_t *, struct ide_cmd *); void ide_tf_read(ide_drive_t *, struct ide_cmd *); void ide_input_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int); void ide_output_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int); -int ide_io_buffers(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int); - -extern void SELECT_DRIVE(ide_drive_t *); void SELECT_MASK(ide_drive_t *, int); u8 ide_read_error(ide_drive_t *); @@ -1226,6 +1221,8 @@ ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_cmd *); ide_startstop_t do_rw_taskfile(ide_drive_t *, struct ide_cmd *); +void ide_pio_bytes(ide_drive_t *, struct ide_cmd *, unsigned int, unsigned int); + void ide_finish_cmd(ide_drive_t *, struct ide_cmd *, u8); int ide_raw_taskfile(ide_drive_t *, struct ide_cmd *, u8 *, u16); @@ -1443,8 +1440,8 @@ ide_startstop_t ide_dma_intr(ide_drive_t *); int ide_allocate_dma_engine(ide_hwif_t *); void ide_release_dma_engine(ide_hwif_t *); -int ide_build_sglist(ide_drive_t *, struct ide_cmd *); -void ide_destroy_dmatable(ide_drive_t *); +int ide_dma_prepare(ide_drive_t *, struct ide_cmd *); +void ide_dma_unmap_sg(ide_drive_t *, struct ide_cmd *); #ifdef CONFIG_BLK_DEV_IDEDMA_SFF int config_drive_for_dma(ide_drive_t *); @@ -1462,7 +1459,6 @@ static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } #endif /* CONFIG_BLK_DEV_IDEDMA_SFF */ void ide_dma_lost_irq(ide_drive_t *); -void ide_dma_timeout(ide_drive_t *); ide_startstop_t ide_dma_timeout_retry(ide_drive_t *, int); #else @@ -1478,8 +1474,10 @@ static inline void ide_check_dma_crc(ide_drive_t *drive) { ; } static inline ide_startstop_t ide_dma_intr(ide_drive_t *drive) { return ide_stopped; } static inline ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) { return ide_stopped; } static inline void ide_release_dma_engine(ide_hwif_t *hwif) { ; } -static inline int ide_build_sglist(ide_drive_t *drive, - struct ide_cmd *cmd) { return 0; } +static inline int ide_dma_prepare(ide_drive_t *drive, + struct ide_cmd *cmd) { return 1; } +static inline void ide_dma_unmap_sg(ide_drive_t *drive, + struct ide_cmd *cmd) { ; } #endif /* CONFIG_BLK_DEV_IDEDMA */ #ifdef CONFIG_BLK_DEV_IDEACPI diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 4d248b3f1323..8815a3456b3b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -649,6 +649,12 @@ struct transaction_s int t_handle_count; /* + * This transaction is being forced and some process is + * waiting for it to finish. + */ + int t_synchronous_commit:1; + + /* * For use by the filesystem to store fs-specific data * structures associated with the transaction */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f81d80f47dcb..e720b0da7751 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -353,6 +353,8 @@ static inline char *pack_hex_byte(char *buf, u8 byte) printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) diff --git a/include/linux/loop.h b/include/linux/loop.h index 6ffd6db5bb0d..40725447f5e0 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -160,5 +160,6 @@ int loop_unregister_transfer(int number); #define LOOP_SET_STATUS64 0x4C04 #define LOOP_GET_STATUS64 0x4C05 #define LOOP_CHANGE_FD 0x4C06 +#define LOOP_SET_CAPACITY 0x4C07 #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index b1ea37fc7a24..aeabe953ba4f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -135,6 +135,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -187,7 +188,7 @@ struct vm_operations_struct { /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ - int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); + int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); /* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware @@ -834,6 +835,7 @@ int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); +void account_page_dirtied(struct page *page, struct address_space *mapping); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d84feb7bdbf0..ddadb4defe00 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -11,6 +11,7 @@ #include <linux/rwsem.h> #include <linux/completion.h> #include <linux/cpumask.h> +#include <linux/page-debug-flags.h> #include <asm/page.h> #include <asm/mmu.h> @@ -174,6 +175,9 @@ struct vm_area_struct { #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif +#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS + unsigned long debug_flags; /* Use atomic bitops on this */ +#endif }; struct core_thread { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1aca6cebbb78..26ef24076b76 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -806,6 +806,14 @@ extern struct zone *next_zone(struct zone *zone); zone; \ zone = next_zone(zone)) +#define for_each_populated_zone(zone) \ + for (zone = (first_online_pgdat())->node_zones; \ + zone; \ + zone = next_zone(zone)) \ + if (!populated_zone(zone)) \ + ; /* do nothing */ \ + else + static inline struct zone *zonelist_zone(struct zoneref *zoneref) { return zoneref->zone; diff --git a/include/linux/msi.h b/include/linux/msi.h index d2b8a1e8ca11..6991ab5b24d1 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -20,20 +20,23 @@ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); struct msi_desc { struct { - __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ + __u8 is_msix : 1; + __u8 multiple: 3; /* log2 number of messages */ __u8 maskbit : 1; /* mask-pending bit supported ? */ - __u8 masked : 1; __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ __u8 pos; /* Location of the msi capability */ - __u32 maskbits_mask; /* mask bits mask */ __u16 entry_nr; /* specific enabled entry */ unsigned default_irq; /* default pre-assigned irq */ - }msi_attrib; + } msi_attrib; + u32 masked; /* mask bits */ unsigned int irq; struct list_head list; - void __iomem *mask_base; + union { + void __iomem *mask_base; + u8 mask_pos; + }; struct pci_dev *dev; /* Last set MSI message */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8cc8807f77d6..bde2557c2a9c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -166,8 +166,7 @@ struct nfs_inode { */ struct radix_tree_root nfs_page_tree; - unsigned long ncommit, - npages; + unsigned long npages; /* Open contexts for shared mmap writes */ struct list_head open_files; @@ -207,6 +206,7 @@ struct nfs_inode { #define NFS_INO_STALE (1) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ +#define NFS_INO_FLUSHING (4) /* inode is flushing out data */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 9bb81aec91cf..29b1e40dce99 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -106,6 +106,11 @@ struct nfs_server { u32 attr_bitmask[2];/* V4 bitmask representing the set of attributes supported on this filesystem */ + u32 cache_consistency_bitmask[2]; + /* V4 bitmask representing the subset + of change attribute, size, ctime + and mtime attributes supported by + the server */ u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 43a713fce11c..b89c34e40bc2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -27,12 +27,8 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid } struct nfs_fattr { - unsigned short valid; /* which fields are valid */ - __u64 pre_size; /* pre_op_attr.size */ - struct timespec pre_mtime; /* pre_op_attr.mtime */ - struct timespec pre_ctime; /* pre_op_attr.ctime */ - enum nfs_ftype type; /* always use NFSv2 types */ - __u32 mode; + unsigned int valid; /* which fields are valid */ + umode_t mode; __u32 nlink; __u32 uid; __u32 gid; @@ -52,19 +48,55 @@ struct nfs_fattr { struct timespec atime; struct timespec mtime; struct timespec ctime; - __u32 bitmap[2]; /* NFSv4 returned attribute bitmap */ __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ + __u64 pre_size; /* pre_op_attr.size */ + struct timespec pre_mtime; /* pre_op_attr.mtime */ + struct timespec pre_ctime; /* pre_op_attr.ctime */ unsigned long time_start; unsigned long gencount; }; -#define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ -#define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ -#define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ -#define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */ -#define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */ -#define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */ +#define NFS_ATTR_FATTR_TYPE (1U << 0) +#define NFS_ATTR_FATTR_MODE (1U << 1) +#define NFS_ATTR_FATTR_NLINK (1U << 2) +#define NFS_ATTR_FATTR_OWNER (1U << 3) +#define NFS_ATTR_FATTR_GROUP (1U << 4) +#define NFS_ATTR_FATTR_RDEV (1U << 5) +#define NFS_ATTR_FATTR_SIZE (1U << 6) +#define NFS_ATTR_FATTR_PRESIZE (1U << 7) +#define NFS_ATTR_FATTR_BLOCKS_USED (1U << 8) +#define NFS_ATTR_FATTR_SPACE_USED (1U << 9) +#define NFS_ATTR_FATTR_FSID (1U << 10) +#define NFS_ATTR_FATTR_FILEID (1U << 11) +#define NFS_ATTR_FATTR_ATIME (1U << 12) +#define NFS_ATTR_FATTR_MTIME (1U << 13) +#define NFS_ATTR_FATTR_CTIME (1U << 14) +#define NFS_ATTR_FATTR_PREMTIME (1U << 15) +#define NFS_ATTR_FATTR_PRECTIME (1U << 16) +#define NFS_ATTR_FATTR_CHANGE (1U << 17) +#define NFS_ATTR_FATTR_PRECHANGE (1U << 18) +#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 19) /* NFSv4 referral */ + +#define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ + | NFS_ATTR_FATTR_MODE \ + | NFS_ATTR_FATTR_NLINK \ + | NFS_ATTR_FATTR_OWNER \ + | NFS_ATTR_FATTR_GROUP \ + | NFS_ATTR_FATTR_RDEV \ + | NFS_ATTR_FATTR_SIZE \ + | NFS_ATTR_FATTR_FSID \ + | NFS_ATTR_FATTR_FILEID \ + | NFS_ATTR_FATTR_ATIME \ + | NFS_ATTR_FATTR_MTIME \ + | NFS_ATTR_FATTR_CTIME) +#define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \ + | NFS_ATTR_FATTR_BLOCKS_USED) +#define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \ + | NFS_ATTR_FATTR_SPACE_USED) +#define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ + | NFS_ATTR_FATTR_SPACE_USED \ + | NFS_ATTR_FATTR_CHANGE) /* * Info on the file system @@ -836,6 +868,7 @@ struct nfs_rpc_ops { int (*lock)(struct file *, int, struct file_lock *); int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); + void (*close_context)(struct nfs_open_context *ctx, int); }; /* diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h new file mode 100644 index 000000000000..b0638fd91e92 --- /dev/null +++ b/include/linux/page-debug-flags.h @@ -0,0 +1,30 @@ +#ifndef LINUX_PAGE_DEBUG_FLAGS_H +#define LINUX_PAGE_DEBUG_FLAGS_H + +/* + * page->debug_flags bits: + * + * PAGE_DEBUG_FLAG_POISON is set for poisoned pages. This is used to + * implement generic debug pagealloc feature. The pages are filled with + * poison patterns and set this flag after free_pages(). The poisoned + * pages are verified whether the patterns are not corrupted and clear + * the flag before alloc_pages(). + */ + +enum page_debug_flags { + PAGE_DEBUG_FLAG_POISON, /* Page is poisoned */ +}; + +/* + * Ensure that CONFIG_WANT_PAGE_DEBUG_FLAGS reliably + * gets turned off when no debug features are enabling it! + */ + +#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS +#if !defined(CONFIG_PAGE_POISONING) \ +/* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */ +#error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features! +#endif +#endif /* CONFIG_WANT_PAGE_DEBUG_FLAGS */ + +#endif /* LINUX_PAGE_DEBUG_FLAGS_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 219a523ecdb0..61df1779b2a5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -96,6 +96,8 @@ enum pageflags { PG_swapbacked, /* Page is backed by RAM/swap */ #ifdef CONFIG_UNEVICTABLE_LRU PG_unevictable, /* Page is "unevictable" */ +#endif +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT PG_mlocked, /* Page is vma mlocked */ #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR @@ -234,20 +236,20 @@ PAGEFLAG_FALSE(SwapCache) #ifdef CONFIG_UNEVICTABLE_LRU PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) TESTCLEARFLAG(Unevictable, unevictable) +#else +PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) + SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) + __CLEARPAGEFLAG_NOOP(Unevictable) +#endif +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT #define MLOCK_PAGES 1 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) TESTSCFLAG(Mlocked, mlocked) - #else - #define MLOCK_PAGES 0 PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) - -PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) - SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) - __CLEARPAGEFLAG_NOOP(Unevictable) #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR @@ -367,9 +369,13 @@ static inline void __ClearPageTail(struct page *page) #ifdef CONFIG_UNEVICTABLE_LRU #define __PG_UNEVICTABLE (1 << PG_unevictable) -#define __PG_MLOCKED (1 << PG_mlocked) #else #define __PG_UNEVICTABLE 0 +#endif + +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT +#define __PG_MLOCKED (1 << PG_mlocked) +#else #define __PG_MLOCKED 0 #endif diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 7b2886fa7fdc..bab82f4c571c 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -24,7 +24,6 @@ void __pagevec_release(struct pagevec *pvec); void __pagevec_free(struct pagevec *pvec); void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); void pagevec_strip(struct pagevec *pvec); -void pagevec_swap_free(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); unsigned pagevec_lookup_tag(struct pagevec *pvec, diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 042c166f65d5..092e82e0048c 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -10,72 +10,25 @@ #include <linux/acpi.h> -#define OSC_QUERY_TYPE 0 -#define OSC_SUPPORT_TYPE 1 -#define OSC_CONTROL_TYPE 2 -#define OSC_SUPPORT_MASKS 0x1f - -/* - * _OSC DW0 Definition - */ -#define OSC_QUERY_ENABLE 1 -#define OSC_REQUEST_ERROR 2 -#define OSC_INVALID_UUID_ERROR 4 -#define OSC_INVALID_REVISION_ERROR 8 -#define OSC_CAPABILITIES_MASK_ERROR 16 - -/* - * _OSC DW1 Definition (OS Support Fields) - */ -#define OSC_EXT_PCI_CONFIG_SUPPORT 1 -#define OSC_ACTIVE_STATE_PWR_SUPPORT 2 -#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT 4 -#define OSC_PCI_SEGMENT_GROUPS_SUPPORT 8 -#define OSC_MSI_SUPPORT 16 - -/* - * _OSC DW1 Definition (OS Control Fields) - */ -#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 1 -#define OSC_SHPC_NATIVE_HP_CONTROL 2 -#define OSC_PCI_EXPRESS_PME_CONTROL 4 -#define OSC_PCI_EXPRESS_AER_CONTROL 8 -#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL 16 - -#define OSC_CONTROL_MASKS (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | \ - OSC_SHPC_NATIVE_HP_CONTROL | \ - OSC_PCI_EXPRESS_PME_CONTROL | \ - OSC_PCI_EXPRESS_AER_CONTROL | \ - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) - #ifdef CONFIG_ACPI -extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); -int pci_acpi_osc_support(acpi_handle handle, u32 flags); static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { - /* Find root host bridge */ - while (pdev->bus->self) - pdev = pdev->bus->self; - - return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus), - pdev->bus->number); + struct pci_bus *pbus = pdev->bus; + /* Find a PCI root bus */ + while (pbus->parent) + pbus = pbus->parent; + return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus), + pbus->number); } static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) { - int seg = pci_domain_nr(pbus), busnr = pbus->number; - struct pci_dev *bridge = pbus->self; - if (bridge) - return DEVICE_ACPI_HANDLE(&(bridge->dev)); - return acpi_get_pci_rootbridge_handle(seg, busnr); + if (pbus->parent) + return DEVICE_ACPI_HANDLE(&(pbus->self->dev)); + return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus), + pbus->number); } #else -#if !defined(AE_ERROR) -typedef u32 acpi_status; -#define AE_ERROR (acpi_status) (0x0001) -#endif -static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) -{return AE_ERROR;} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { return NULL; } #endif diff --git a/include/linux/pci.h b/include/linux/pci.h index df3644132617..a7fe4bbd7ff1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -52,6 +52,7 @@ #include <asm/atomic.h> #include <linux/device.h> #include <linux/io.h> +#include <linux/irqreturn.h> /* Include the ID list */ #include <linux/pci_ids.h> @@ -93,6 +94,12 @@ enum { /* #6: expansion ROM resource */ PCI_ROM_RESOURCE, + /* device specific resources */ +#ifdef CONFIG_PCI_IOV + PCI_IOV_RESOURCES, + PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1, +#endif + /* resources assigned to buses behind the bridge */ #define PCI_BRIDGE_RESOURCE_NUM 4 @@ -180,6 +187,7 @@ struct pci_cap_saved_state { struct pcie_link_state; struct pci_vpd; +struct pci_sriov; /* * The pci_dev structure is used to describe PCI devices. @@ -257,6 +265,8 @@ struct pci_dev { unsigned int is_managed:1; unsigned int is_pcie:1; unsigned int state_saved:1; + unsigned int is_physfn:1; + unsigned int is_virtfn:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ @@ -270,6 +280,12 @@ struct pci_dev { struct list_head msi_list; #endif struct pci_vpd *vpd; +#ifdef CONFIG_PCI_IOV + union { + struct pci_sriov *sriov; /* SR-IOV capability related */ + struct pci_dev *physfn; /* the PF this VF is associated with */ + }; +#endif }; extern struct pci_dev *alloc_pci_dev(void); @@ -341,6 +357,15 @@ struct pci_bus { #define pci_bus_b(n) list_entry(n, struct pci_bus, node) #define to_pci_bus(n) container_of(n, struct pci_bus, dev) +/* + * Returns true if the pci bus is root (behind host-pci bridge), + * false otherwise + */ +static inline bool pci_is_root_bus(struct pci_bus *pbus) +{ + return !(pbus->parent); +} + #ifdef CONFIG_PCI_MSI static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { @@ -528,7 +553,7 @@ void pcibios_update_irq(struct pci_dev *, int irq); /* Generic PCI functions used internally */ extern struct pci_bus *pci_find_bus(int domain, int busnr); -void pci_bus_add_devices(struct pci_bus *bus); +void pci_bus_add_devices(const struct pci_bus *bus); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, @@ -702,6 +727,9 @@ int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +#ifdef CONFIG_HOTPLUG +unsigned int pci_rescan_bus(struct pci_bus *bus); +#endif /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); @@ -709,7 +737,7 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void int pci_vpd_truncate(struct pci_dev *dev, size_t size); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ -void pci_bus_assign_resources(struct pci_bus *bus); +void pci_bus_assign_resources(const struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); int pci_claim_resource(struct pci_dev *, int); void pci_assign_unassigned_resources(void); @@ -790,7 +818,7 @@ struct msix_entry { #ifndef CONFIG_PCI_MSI -static inline int pci_enable_msi(struct pci_dev *dev) +static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) { return -1; } @@ -800,6 +828,10 @@ static inline void pci_msi_shutdown(struct pci_dev *dev) static inline void pci_disable_msi(struct pci_dev *dev) { } +static inline int pci_msix_table_size(struct pci_dev *dev) +{ + return 0; +} static inline int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) { @@ -821,9 +853,10 @@ static inline int pci_msi_enabled(void) return 0; } #else -extern int pci_enable_msi(struct pci_dev *dev); +extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); extern void pci_msi_shutdown(struct pci_dev *dev); extern void pci_disable_msi(struct pci_dev *dev); +extern int pci_msix_table_size(struct pci_dev *dev); extern int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); extern void pci_msix_shutdown(struct pci_dev *dev); @@ -842,6 +875,8 @@ static inline int pcie_aspm_enabled(void) extern int pcie_aspm_enabled(void); #endif +#define pci_enable_msi(pdev) pci_enable_msi_block(pdev, 1) + #ifdef CONFIG_HT_IRQ /* The functions a driver should call */ int ht_create_irq(struct pci_dev *dev, int idx); @@ -1195,5 +1230,23 @@ int pci_ext_cfg_avail(struct pci_dev *dev); void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); +#ifdef CONFIG_PCI_IOV +extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); +extern void pci_disable_sriov(struct pci_dev *dev); +extern irqreturn_t pci_sriov_migration(struct pci_dev *dev); +#else +static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) +{ + return -ENODEV; +} +static inline void pci_disable_sriov(struct pci_dev *dev) +{ +} +static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev) +{ + return IRQ_NONE; +} +#endif + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e5816dd33371..cb14fd260837 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2396,6 +2396,7 @@ #define PCI_DEVICE_ID_INTEL_82801CA_12 0x248c #define PCI_DEVICE_ID_INTEL_82801DB_0 0x24c0 #define PCI_DEVICE_ID_INTEL_82801DB_1 0x24c1 +#define PCI_DEVICE_ID_INTEL_82801DB_2 0x24c2 #define PCI_DEVICE_ID_INTEL_82801DB_3 0x24c3 #define PCI_DEVICE_ID_INTEL_82801DB_5 0x24c5 #define PCI_DEVICE_ID_INTEL_82801DB_6 0x24c6 diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 027815b4635e..e4d08c1b2e0b 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -235,7 +235,7 @@ #define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ -#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */ #define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ #define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ #define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ @@ -375,6 +375,7 @@ #define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ @@ -487,6 +488,8 @@ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ +#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ /* Extended Capabilities (PCI-X 2.0 and Express) */ #define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) @@ -498,6 +501,7 @@ #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 #define PCI_EXT_CAP_ID_ARI 14 +#define PCI_EXT_CAP_ID_SRIOV 16 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -615,4 +619,35 @@ #define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ #define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +/* Single Root I/O Virtualization */ +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 6cd91e3f9820..b4c79545330b 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -16,29 +16,30 @@ #define PCIE_ANY_PORT 7 /* Service Type */ -#define PCIE_PORT_SERVICE_PME 1 /* Power Management Event */ -#define PCIE_PORT_SERVICE_AER 2 /* Advanced Error Reporting */ -#define PCIE_PORT_SERVICE_HP 4 /* Native Hotplug */ -#define PCIE_PORT_SERVICE_VC 8 /* Virtual Channel */ +#define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */ +#define PCIE_PORT_SERVICE_PME (1 << PCIE_PORT_SERVICE_PME_SHIFT) +#define PCIE_PORT_SERVICE_AER_SHIFT 1 /* Advanced Error Reporting */ +#define PCIE_PORT_SERVICE_AER (1 << PCIE_PORT_SERVICE_AER_SHIFT) +#define PCIE_PORT_SERVICE_HP_SHIFT 2 /* Native Hotplug */ +#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) +#define PCIE_PORT_SERVICE_VC_SHIFT 3 /* Virtual Channel */ +#define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT) /* Root/Upstream/Downstream Port's Interrupt Mode */ +#define PCIE_PORT_NO_IRQ (-1) #define PCIE_PORT_INTx_MODE 0 #define PCIE_PORT_MSI_MODE 1 #define PCIE_PORT_MSIX_MODE 2 -struct pcie_port_service_id { - __u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/ - __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ - __u32 class, class_mask; /* (class,subclass,prog-if) triplet */ - __u32 port_type, service_type; /* Port Entity */ - kernel_ulong_t driver_data; +struct pcie_port_data { + int port_type; /* Type of the port */ + int port_irq_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ }; struct pcie_device { int irq; /* Service IRQ/MSI/MSI-X Vector */ - int interrupt_mode; /* [0:INTx | 1:MSI | 2:MSI-X] */ - struct pcie_port_service_id id; /* Service ID */ - struct pci_dev *port; /* Root/Upstream/Downstream Port */ + struct pci_dev *port; /* Root/Upstream/Downstream Port */ + u32 service; /* Port service this device represents */ void *priv_data; /* Service Private Data */ struct device device; /* Generic Device Interface */ }; @@ -56,10 +57,9 @@ static inline void* get_service_data(struct pcie_device *dev) struct pcie_port_service_driver { const char *name; - int (*probe) (struct pcie_device *dev, - const struct pcie_port_service_id *id); + int (*probe) (struct pcie_device *dev); void (*remove) (struct pcie_device *dev); - int (*suspend) (struct pcie_device *dev, pm_message_t state); + int (*suspend) (struct pcie_device *dev); int (*resume) (struct pcie_device *dev); /* Service Error Recovery Handler */ @@ -68,7 +68,9 @@ struct pcie_port_service_driver { /* Link Reset Capability - AER service driver specific */ pci_ers_result_t (*reset_link) (struct pci_dev *dev); - const struct pcie_port_service_id *id_table; + int port_type; /* Type of the port this driver can handle */ + u32 service; /* Port service this device represents */ + struct device_driver driver; }; #define to_service_driver(d) \ diff --git a/include/linux/poison.h b/include/linux/poison.h index 9f31683728fd..6729f7dcd60e 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -17,6 +17,9 @@ */ #define TIMER_ENTRY_STATIC ((void *) 0x74737461) +/********** mm/debug-pagealloc.c **********/ +#define PAGE_POISON 0xaa + /********** mm/slab.c **********/ /* * Magic nums for obj red zoning. diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 4046b75563c1..60f88a7fb13d 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -99,6 +99,7 @@ struct rtc_pll_info { #ifdef __KERNEL__ +#include <linux/types.h> #include <linux/interrupt.h> extern int rtc_month_days(unsigned int month, unsigned int year); @@ -232,6 +233,11 @@ int rtc_register(rtc_task_t *task); int rtc_unregister(rtc_task_t *task); int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); +static inline bool is_leap_year(unsigned int year) +{ + return (!(year % 4) && (year % 100)) || !(year % 400); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_RTC_H_ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 29df6374d2de..481fad3a9b42 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -391,8 +391,15 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) -#define get_mm_hiwater_rss(mm) max((mm)->hiwater_rss, get_mm_rss(mm)) -#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm) +static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) +{ + return max(mm->hiwater_rss, get_mm_rss(mm)); +} + +static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm) +{ + return max(mm->hiwater_vm, mm->total_vm); +} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); diff --git a/include/linux/string.h b/include/linux/string.h index d18fc198aa2f..8852739f36df 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -12,6 +12,7 @@ #include <linux/stddef.h> /* for NULL */ extern char *strndup_user(const char __user *, long); +extern void *memdup_user(const void __user *, size_t); /* * Include machine specific inline routines diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3435d24bfe55..d3a4c0231933 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -69,7 +69,6 @@ struct svc_serv { struct list_head sv_tempsocks; /* all temporary sockets */ int sv_tmpcnt; /* count of temporary sockets */ struct timer_list sv_temptimer; /* timer for aging temporary sockets */ - sa_family_t sv_family; /* listener's address family */ char * sv_name; /* service name */ @@ -385,19 +384,19 @@ struct svc_procedure { /* * Function prototypes. */ -struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t, +struct svc_serv *svc_create(struct svc_program *, unsigned int, void (*shutdown)(struct svc_serv *)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - sa_family_t, void (*shutdown)(struct svc_serv *), + void (*shutdown)(struct svc_serv *), svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); -int svc_register(const struct svc_serv *, const unsigned short, - const unsigned short); +int svc_register(const struct svc_serv *, const int, + const unsigned short, const unsigned short); void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0127daca4354..0d9cb6ef28b0 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -71,7 +71,8 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, char *, unsigned short, int); +int svc_create_xprt(struct svc_serv *, const char *, const int, + const unsigned short, int); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_received(struct svc_xprt *); void svc_xprt_put(struct svc_xprt *xprt); @@ -80,7 +81,8 @@ void svc_close_xprt(struct svc_xprt *xprt); void svc_delete_xprt(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); -struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int); +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, + const sa_family_t af, const unsigned short port); int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen); static inline void svc_xprt_get(struct svc_xprt *xprt) @@ -88,29 +90,32 @@ static inline void svc_xprt_get(struct svc_xprt *xprt) kref_get(&xprt->xpt_ref); } static inline void svc_xprt_set_local(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_local, sa, salen); xprt->xpt_locallen = salen; } static inline void svc_xprt_set_remote(struct svc_xprt *xprt, - struct sockaddr *sa, int salen) + const struct sockaddr *sa, + const size_t salen) { memcpy(&xprt->xpt_remote, sa, salen); xprt->xpt_remotelen = salen; } -static inline unsigned short svc_addr_port(struct sockaddr *sa) +static inline unsigned short svc_addr_port(const struct sockaddr *sa) { - unsigned short ret = 0; + const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa; + switch (sa->sa_family) { case AF_INET: - ret = ntohs(((struct sockaddr_in *)sa)->sin_port); - break; + return ntohs(sin->sin_port); case AF_INET6: - ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); - break; + return ntohs(sin6->sin6_port); } - return ret; + + return 0; } static inline size_t svc_addr_len(struct sockaddr *sa) @@ -124,36 +129,39 @@ static inline size_t svc_addr_len(struct sockaddr *sa) return -EAFNOSUPPORT; } -static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_local); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_local); } -static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt) +static inline unsigned short svc_xprt_remote_port(const struct svc_xprt *xprt) { - return svc_addr_port((struct sockaddr *)&xprt->xpt_remote); + return svc_addr_port((const struct sockaddr *)&xprt->xpt_remote); } -static inline char *__svc_print_addr(struct sockaddr *addr, - char *buf, size_t len) +static inline char *__svc_print_addr(const struct sockaddr *addr, + char *buf, const size_t len) { + const struct sockaddr_in *sin = (const struct sockaddr_in *)addr; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)addr; + switch (addr->sa_family) { case AF_INET: - snprintf(buf, len, "%pI4, port=%u", - &((struct sockaddr_in *)addr)->sin_addr, - ntohs(((struct sockaddr_in *) addr)->sin_port)); + snprintf(buf, len, "%pI4, port=%u", &sin->sin_addr, + ntohs(sin->sin_port)); break; case AF_INET6: snprintf(buf, len, "%pI6, port=%u", - &((struct sockaddr_in6 *)addr)->sin6_addr, - ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); + &sin6->sin6_addr, + ntohs(sin6->sin6_port)); break; default: snprintf(buf, len, "unknown address type: %d", addr->sa_family); break; } + return buf; } #endif /* SUNRPC_SVC_XPRT_H */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 11fc71d50c1e..1758d9f5b5c3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -235,6 +235,7 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * */ int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); +int xprt_load_transport(const char *); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); @@ -259,6 +260,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) +#define XPRT_CONNECTION_ABORT (7) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/include/linux/suspend.h b/include/linux/suspend.h index c7d9bb1832ba..3e3a4364cbff 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -1,9 +1,6 @@ #ifndef _LINUX_SUSPEND_H #define _LINUX_SUSPEND_H -#if defined(CONFIG_X86) || defined(CONFIG_FRV) || defined(CONFIG_PPC32) || defined(CONFIG_PPC64) -#include <asm/suspend.h> -#endif #include <linux/swap.h> #include <linux/notifier.h> #include <linux/init.h> diff --git a/include/linux/swap.h b/include/linux/swap.h index d30215578877..62d81435347a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -212,7 +212,7 @@ static inline void lru_cache_add_active_file(struct page *page) /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, - gfp_t gfp_mask); + gfp_t gfp_mask, nodemask_t *mask); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, unsigned int swappiness); @@ -382,6 +382,11 @@ static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, return NULL; } +static inline int swap_writepage(struct page *p, struct writeback_control *wbc) +{ + return 0; +} + static inline struct page *lookup_swap_cache(swp_entry_t swp) { return NULL; diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 08e088334dba..8615d661ab60 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -252,8 +252,6 @@ struct tty_operations { void (*set_ldisc)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, int timeout); void (*send_xchar)(struct tty_struct *tty, char ch); - int (*read_proc)(char *page, char **start, off_t off, - int count, int *eof, void *data); int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); @@ -264,6 +262,7 @@ struct tty_operations { int (*poll_get_char)(struct tty_driver *driver, int line); void (*poll_put_char)(struct tty_driver *driver, int line, char ch); #endif + const struct file_operations *proc_fops; }; struct tty_driver { diff --git a/include/linux/wait.h b/include/linux/wait.h index a210ede73b56..5d631c17eaee 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -135,8 +135,11 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); -extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); +void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, + void *key); +void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_bit(wait_queue_head_t *, void *, int); int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); @@ -155,21 +158,17 @@ wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1) -#ifdef CONFIG_DEBUG_LOCK_ALLOC /* - * macro to avoid include hell + * Wakeup macros to be used to report events to the targets. */ -#define wake_up_nested(x, s) \ -do { \ - unsigned long flags; \ - \ - spin_lock_irqsave_nested(&(x)->lock, flags, (s)); \ - wake_up_locked(x); \ - spin_unlock_irqrestore(&(x)->lock, flags); \ -} while (0) -#else -#define wake_up_nested(x, s) wake_up(x) -#endif +#define wake_up_poll(x, m) \ + __wake_up(x, TASK_NORMAL, 1, (void *) (m)) +#define wake_up_locked_poll(x, m) \ + __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) +#define wake_up_interruptible_poll(x, m) \ + __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) +#define wake_up_interruptible_sync_poll(x, m) \ + __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m)) #define __wait_event(wq, condition) \ do { \ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 7300ecdc480c..93445477f86a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -109,8 +109,8 @@ extern int dirty_background_ratio; extern unsigned long dirty_background_bytes; extern int vm_dirty_ratio; extern unsigned long vm_dirty_bytes; -extern int dirty_writeback_interval; -extern int dirty_expire_interval; +extern unsigned int dirty_writeback_interval; +extern unsigned int dirty_expire_interval; extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; diff --git a/include/video/aty128.h b/include/video/aty128.h index 51ac69f05bdc..f0851e3bb7cc 100644 --- a/include/video/aty128.h +++ b/include/video/aty128.h @@ -415,7 +415,7 @@ #define PWR_MGT_SLOWDOWN_MCLK 0x00002000 #define PMI_PMSCR_REG 0x60 - + /* used by ATI bug fix for hardware ROM */ #define RAGE128_MPP_TB_CONFIG 0x01c0 diff --git a/include/video/cirrus.h b/include/video/cirrus.h index b2776b6c8679..9a5e9ee30782 100644 --- a/include/video/cirrus.h +++ b/include/video/cirrus.h @@ -32,7 +32,6 @@ #define CL_VSSM2 0x3c3 /* Motherboard Sleep */ /*** VGA Sequencer Registers ***/ -#define CL_SEQR0 0x0 /* Reset */ /* the following are from the "extension registers" group */ #define CL_SEQR6 0x6 /* Unlock ALL Extensions */ #define CL_SEQR7 0x7 /* Extended Sequencer Mode */ @@ -71,6 +70,7 @@ #define CL_CRT1B 0x1b /* Extended Display Controls */ #define CL_CRT1C 0x1c /* Sync adjust and genlock register */ #define CL_CRT1D 0x1d /* Overlay Extended Control register */ +#define CL_CRT1E 0x1e /* Another overflow register */ #define CL_CRT25 0x25 /* Part Status Register */ #define CL_CRT27 0x27 /* ID Register */ #define CL_CRT51 0x51 /* P4 disable "flicker fixer" */ diff --git a/include/video/newport.h b/include/video/newport.h index 1f5ebeaa818f..001b935e71c4 100644 --- a/include/video/newport.h +++ b/include/video/newport.h @@ -453,7 +453,7 @@ static __inline__ int newport_wait(struct newport_regs *regs) { int t = BUSY_TIMEOUT; - while (t--) + while (--t) if (!(regs->cset.status & NPORT_STAT_GBUSY)) break; return !t; @@ -463,7 +463,7 @@ static __inline__ int newport_bfwait(struct newport_regs *regs) { int t = BUSY_TIMEOUT; - while (t--) + while (--t) if(!(regs->cset.status & NPORT_STAT_BBUSY)) break; return !t; diff --git a/include/video/radeon.h b/include/video/radeon.h index e072b16b39ab..56b188abfb54 100644 --- a/include/video/radeon.h +++ b/include/video/radeon.h @@ -5,12 +5,12 @@ #define RADEON_REGSIZE 0x4000 -#define MM_INDEX 0x0000 -#define MM_DATA 0x0004 -#define BUS_CNTL 0x0030 -#define HI_STAT 0x004C +#define MM_INDEX 0x0000 +#define MM_DATA 0x0004 +#define BUS_CNTL 0x0030 +#define HI_STAT 0x004C #define BUS_CNTL1 0x0034 -#define I2C_CNTL_1 0x0094 +#define I2C_CNTL_1 0x0094 #define CNFG_CNTL 0x00E0 #define CNFG_MEMSIZE 0x00F8 #define CNFG_APER_0_BASE 0x0100 @@ -18,8 +18,8 @@ #define CNFG_APER_SIZE 0x0108 #define CNFG_REG_1_BASE 0x010C #define CNFG_REG_APER_SIZE 0x0110 -#define PAD_AGPINPUT_DELAY 0x0164 -#define PAD_CTLR_STRENGTH 0x0168 +#define PAD_AGPINPUT_DELAY 0x0164 +#define PAD_CTLR_STRENGTH 0x0168 #define PAD_CTLR_UPDATE 0x016C #define PAD_CTLR_MISC 0x0aa0 #define AGP_CNTL 0x0174 @@ -27,171 +27,171 @@ #define CAP0_TRIG_CNTL 0x0950 #define CAP1_TRIG_CNTL 0x09c0 #define VIPH_CONTROL 0x0C40 -#define VENDOR_ID 0x0F00 -#define DEVICE_ID 0x0F02 -#define COMMAND 0x0F04 -#define STATUS 0x0F06 -#define REVISION_ID 0x0F08 -#define REGPROG_INF 0x0F09 -#define SUB_CLASS 0x0F0A -#define BASE_CODE 0x0F0B -#define CACHE_LINE 0x0F0C -#define LATENCY 0x0F0D -#define HEADER 0x0F0E -#define BIST 0x0F0F -#define REG_MEM_BASE 0x0F10 -#define REG_IO_BASE 0x0F14 +#define VENDOR_ID 0x0F00 +#define DEVICE_ID 0x0F02 +#define COMMAND 0x0F04 +#define STATUS 0x0F06 +#define REVISION_ID 0x0F08 +#define REGPROG_INF 0x0F09 +#define SUB_CLASS 0x0F0A +#define BASE_CODE 0x0F0B +#define CACHE_LINE 0x0F0C +#define LATENCY 0x0F0D +#define HEADER 0x0F0E +#define BIST 0x0F0F +#define REG_MEM_BASE 0x0F10 +#define REG_IO_BASE 0x0F14 #define REG_REG_BASE 0x0F18 #define ADAPTER_ID 0x0F2C #define BIOS_ROM 0x0F30 -#define CAPABILITIES_PTR 0x0F34 -#define INTERRUPT_LINE 0x0F3C -#define INTERRUPT_PIN 0x0F3D -#define MIN_GRANT 0x0F3E -#define MAX_LATENCY 0x0F3F -#define ADAPTER_ID_W 0x0F4C -#define PMI_CAP_ID 0x0F50 -#define PMI_NXT_CAP_PTR 0x0F51 -#define PMI_PMC_REG 0x0F52 -#define PM_STATUS 0x0F54 -#define PMI_DATA 0x0F57 -#define AGP_CAP_ID 0x0F58 -#define AGP_STATUS 0x0F5C -#define AGP_COMMAND 0x0F60 +#define CAPABILITIES_PTR 0x0F34 +#define INTERRUPT_LINE 0x0F3C +#define INTERRUPT_PIN 0x0F3D +#define MIN_GRANT 0x0F3E +#define MAX_LATENCY 0x0F3F +#define ADAPTER_ID_W 0x0F4C +#define PMI_CAP_ID 0x0F50 +#define PMI_NXT_CAP_PTR 0x0F51 +#define PMI_PMC_REG 0x0F52 +#define PM_STATUS 0x0F54 +#define PMI_DATA 0x0F57 +#define AGP_CAP_ID 0x0F58 +#define AGP_STATUS 0x0F5C +#define AGP_COMMAND 0x0F60 #define AIC_CTRL 0x01D0 #define AIC_STAT 0x01D4 #define AIC_PT_BASE 0x01D8 -#define AIC_LO_ADDR 0x01DC -#define AIC_HI_ADDR 0x01E0 -#define AIC_TLB_ADDR 0x01E4 -#define AIC_TLB_DATA 0x01E8 -#define DAC_CNTL 0x0058 +#define AIC_LO_ADDR 0x01DC +#define AIC_HI_ADDR 0x01E0 +#define AIC_TLB_ADDR 0x01E4 +#define AIC_TLB_DATA 0x01E8 +#define DAC_CNTL 0x0058 #define DAC_CNTL2 0x007c -#define CRTC_GEN_CNTL 0x0050 -#define MEM_CNTL 0x0140 +#define CRTC_GEN_CNTL 0x0050 +#define MEM_CNTL 0x0140 #define MC_CNTL 0x0140 -#define EXT_MEM_CNTL 0x0144 +#define EXT_MEM_CNTL 0x0144 #define MC_TIMING_CNTL 0x0144 -#define MC_AGP_LOCATION 0x014C -#define MEM_IO_CNTL_A0 0x0178 +#define MC_AGP_LOCATION 0x014C +#define MEM_IO_CNTL_A0 0x0178 #define MEM_REFRESH_CNTL 0x0178 -#define MEM_INIT_LATENCY_TIMER 0x0154 +#define MEM_INIT_LATENCY_TIMER 0x0154 #define MC_INIT_GFX_LAT_TIMER 0x0154 -#define MEM_SDRAM_MODE_REG 0x0158 -#define AGP_BASE 0x0170 -#define MEM_IO_CNTL_A1 0x017C +#define MEM_SDRAM_MODE_REG 0x0158 +#define AGP_BASE 0x0170 +#define MEM_IO_CNTL_A1 0x017C #define MC_READ_CNTL_AB 0x017C #define MEM_IO_CNTL_B0 0x0180 #define MC_INIT_MISC_LAT_TIMER 0x0180 #define MEM_IO_CNTL_B1 0x0184 #define MC_IOPAD_CNTL 0x0184 #define MC_DEBUG 0x0188 -#define MC_STATUS 0x0150 -#define MEM_IO_OE_CNTL 0x018C +#define MC_STATUS 0x0150 +#define MEM_IO_OE_CNTL 0x018C #define MC_CHIP_IO_OE_CNTL_AB 0x018C -#define MC_FB_LOCATION 0x0148 -#define HOST_PATH_CNTL 0x0130 -#define MEM_VGA_WP_SEL 0x0038 -#define MEM_VGA_RP_SEL 0x003C -#define HDP_DEBUG 0x0138 +#define MC_FB_LOCATION 0x0148 +#define HOST_PATH_CNTL 0x0130 +#define MEM_VGA_WP_SEL 0x0038 +#define MEM_VGA_RP_SEL 0x003C +#define HDP_DEBUG 0x0138 #define SW_SEMAPHORE 0x013C -#define CRTC2_GEN_CNTL 0x03f8 +#define CRTC2_GEN_CNTL 0x03f8 #define CRTC2_DISPLAY_BASE_ADDR 0x033c -#define SURFACE_CNTL 0x0B00 -#define SURFACE0_LOWER_BOUND 0x0B04 -#define SURFACE1_LOWER_BOUND 0x0B14 -#define SURFACE2_LOWER_BOUND 0x0B24 -#define SURFACE3_LOWER_BOUND 0x0B34 -#define SURFACE4_LOWER_BOUND 0x0B44 +#define SURFACE_CNTL 0x0B00 +#define SURFACE0_LOWER_BOUND 0x0B04 +#define SURFACE1_LOWER_BOUND 0x0B14 +#define SURFACE2_LOWER_BOUND 0x0B24 +#define SURFACE3_LOWER_BOUND 0x0B34 +#define SURFACE4_LOWER_BOUND 0x0B44 #define SURFACE5_LOWER_BOUND 0x0B54 #define SURFACE6_LOWER_BOUND 0x0B64 #define SURFACE7_LOWER_BOUND 0x0B74 -#define SURFACE0_UPPER_BOUND 0x0B08 -#define SURFACE1_UPPER_BOUND 0x0B18 -#define SURFACE2_UPPER_BOUND 0x0B28 -#define SURFACE3_UPPER_BOUND 0x0B38 -#define SURFACE4_UPPER_BOUND 0x0B48 -#define SURFACE5_UPPER_BOUND 0x0B58 -#define SURFACE6_UPPER_BOUND 0x0B68 -#define SURFACE7_UPPER_BOUND 0x0B78 -#define SURFACE0_INFO 0x0B0C -#define SURFACE1_INFO 0x0B1C -#define SURFACE2_INFO 0x0B2C -#define SURFACE3_INFO 0x0B3C -#define SURFACE4_INFO 0x0B4C -#define SURFACE5_INFO 0x0B5C +#define SURFACE0_UPPER_BOUND 0x0B08 +#define SURFACE1_UPPER_BOUND 0x0B18 +#define SURFACE2_UPPER_BOUND 0x0B28 +#define SURFACE3_UPPER_BOUND 0x0B38 +#define SURFACE4_UPPER_BOUND 0x0B48 +#define SURFACE5_UPPER_BOUND 0x0B58 +#define SURFACE6_UPPER_BOUND 0x0B68 +#define SURFACE7_UPPER_BOUND 0x0B78 +#define SURFACE0_INFO 0x0B0C +#define SURFACE1_INFO 0x0B1C +#define SURFACE2_INFO 0x0B2C +#define SURFACE3_INFO 0x0B3C +#define SURFACE4_INFO 0x0B4C +#define SURFACE5_INFO 0x0B5C #define SURFACE6_INFO 0x0B6C #define SURFACE7_INFO 0x0B7C #define SURFACE_ACCESS_FLAGS 0x0BF8 -#define SURFACE_ACCESS_CLR 0x0BFC -#define GEN_INT_CNTL 0x0040 -#define GEN_INT_STATUS 0x0044 +#define SURFACE_ACCESS_CLR 0x0BFC +#define GEN_INT_CNTL 0x0040 +#define GEN_INT_STATUS 0x0044 #define CRTC_EXT_CNTL 0x0054 -#define RB3D_CNTL 0x1C3C -#define WAIT_UNTIL 0x1720 -#define ISYNC_CNTL 0x1724 -#define RBBM_GUICNTL 0x172C -#define RBBM_STATUS 0x0E40 -#define RBBM_STATUS_alt_1 0x1740 -#define RBBM_CNTL 0x00EC -#define RBBM_CNTL_alt_1 0x0E44 -#define RBBM_SOFT_RESET 0x00F0 -#define RBBM_SOFT_RESET_alt_1 0x0E48 -#define NQWAIT_UNTIL 0x0E50 +#define RB3D_CNTL 0x1C3C +#define WAIT_UNTIL 0x1720 +#define ISYNC_CNTL 0x1724 +#define RBBM_GUICNTL 0x172C +#define RBBM_STATUS 0x0E40 +#define RBBM_STATUS_alt_1 0x1740 +#define RBBM_CNTL 0x00EC +#define RBBM_CNTL_alt_1 0x0E44 +#define RBBM_SOFT_RESET 0x00F0 +#define RBBM_SOFT_RESET_alt_1 0x0E48 +#define NQWAIT_UNTIL 0x0E50 #define RBBM_DEBUG 0x0E6C #define RBBM_CMDFIFO_ADDR 0x0E70 #define RBBM_CMDFIFO_DATAL 0x0E74 -#define RBBM_CMDFIFO_DATAH 0x0E78 -#define RBBM_CMDFIFO_STAT 0x0E7C -#define CRTC_STATUS 0x005C -#define GPIO_VGA_DDC 0x0060 -#define GPIO_DVI_DDC 0x0064 -#define GPIO_MONID 0x0068 +#define RBBM_CMDFIFO_DATAH 0x0E78 +#define RBBM_CMDFIFO_STAT 0x0E7C +#define CRTC_STATUS 0x005C +#define GPIO_VGA_DDC 0x0060 +#define GPIO_DVI_DDC 0x0064 +#define GPIO_MONID 0x0068 #define GPIO_CRT2_DDC 0x006c -#define PALETTE_INDEX 0x00B0 -#define PALETTE_DATA 0x00B4 -#define PALETTE_30_DATA 0x00B8 -#define CRTC_H_TOTAL_DISP 0x0200 -#define CRTC_H_SYNC_STRT_WID 0x0204 -#define CRTC_V_TOTAL_DISP 0x0208 -#define CRTC_V_SYNC_STRT_WID 0x020C -#define CRTC_VLINE_CRNT_VLINE 0x0210 +#define PALETTE_INDEX 0x00B0 +#define PALETTE_DATA 0x00B4 +#define PALETTE_30_DATA 0x00B8 +#define CRTC_H_TOTAL_DISP 0x0200 +#define CRTC_H_SYNC_STRT_WID 0x0204 +#define CRTC_V_TOTAL_DISP 0x0208 +#define CRTC_V_SYNC_STRT_WID 0x020C +#define CRTC_VLINE_CRNT_VLINE 0x0210 #define CRTC_CRNT_FRAME 0x0214 #define CRTC_GUI_TRIG_VLINE 0x0218 #define CRTC_DEBUG 0x021C -#define CRTC_OFFSET_RIGHT 0x0220 -#define CRTC_OFFSET 0x0224 -#define CRTC_OFFSET_CNTL 0x0228 -#define CRTC_PITCH 0x022C -#define OVR_CLR 0x0230 -#define OVR_WID_LEFT_RIGHT 0x0234 -#define OVR_WID_TOP_BOTTOM 0x0238 -#define DISPLAY_BASE_ADDR 0x023C -#define SNAPSHOT_VH_COUNTS 0x0240 -#define SNAPSHOT_F_COUNT 0x0244 -#define N_VIF_COUNT 0x0248 -#define SNAPSHOT_VIF_COUNT 0x024C -#define FP_CRTC_H_TOTAL_DISP 0x0250 -#define FP_CRTC_V_TOTAL_DISP 0x0254 +#define CRTC_OFFSET_RIGHT 0x0220 +#define CRTC_OFFSET 0x0224 +#define CRTC_OFFSET_CNTL 0x0228 +#define CRTC_PITCH 0x022C +#define OVR_CLR 0x0230 +#define OVR_WID_LEFT_RIGHT 0x0234 +#define OVR_WID_TOP_BOTTOM 0x0238 +#define DISPLAY_BASE_ADDR 0x023C +#define SNAPSHOT_VH_COUNTS 0x0240 +#define SNAPSHOT_F_COUNT 0x0244 +#define N_VIF_COUNT 0x0248 +#define SNAPSHOT_VIF_COUNT 0x024C +#define FP_CRTC_H_TOTAL_DISP 0x0250 +#define FP_CRTC_V_TOTAL_DISP 0x0254 #define CRT_CRTC_H_SYNC_STRT_WID 0x0258 #define CRT_CRTC_V_SYNC_STRT_WID 0x025C #define CUR_OFFSET 0x0260 -#define CUR_HORZ_VERT_POSN 0x0264 -#define CUR_HORZ_VERT_OFF 0x0268 -#define CUR_CLR0 0x026C -#define CUR_CLR1 0x0270 -#define FP_HORZ_VERT_ACTIVE 0x0278 -#define CRTC_MORE_CNTL 0x027C +#define CUR_HORZ_VERT_POSN 0x0264 +#define CUR_HORZ_VERT_OFF 0x0268 +#define CUR_CLR0 0x026C +#define CUR_CLR1 0x0270 +#define FP_HORZ_VERT_ACTIVE 0x0278 +#define CRTC_MORE_CNTL 0x027C #define CRTC_H_CUTOFF_ACTIVE_EN (1<<4) #define CRTC_V_CUTOFF_ACTIVE_EN (1<<5) -#define DAC_EXT_CNTL 0x0280 -#define FP_GEN_CNTL 0x0284 -#define FP_HORZ_STRETCH 0x028C -#define FP_VERT_STRETCH 0x0290 -#define FP_H_SYNC_STRT_WID 0x02C4 -#define FP_V_SYNC_STRT_WID 0x02C8 -#define AUX_WINDOW_HORZ_CNTL 0x02D8 -#define AUX_WINDOW_VERT_CNTL 0x02DC +#define DAC_EXT_CNTL 0x0280 +#define FP_GEN_CNTL 0x0284 +#define FP_HORZ_STRETCH 0x028C +#define FP_VERT_STRETCH 0x0290 +#define FP_H_SYNC_STRT_WID 0x02C4 +#define FP_V_SYNC_STRT_WID 0x02C8 +#define AUX_WINDOW_HORZ_CNTL 0x02D8 +#define AUX_WINDOW_VERT_CNTL 0x02DC //#define DDA_CONFIG 0x02e0 //#define DDA_ON_OFF 0x02e4 #define DVI_I2C_CNTL_1 0x02e4 @@ -199,192 +199,192 @@ #define GRPH2_BUFFER_CNTL 0x03F0 #define VGA_BUFFER_CNTL 0x02F4 #define OV0_Y_X_START 0x0400 -#define OV0_Y_X_END 0x0404 -#define OV0_PIPELINE_CNTL 0x0408 -#define OV0_REG_LOAD_CNTL 0x0410 -#define OV0_SCALE_CNTL 0x0420 -#define OV0_V_INC 0x0424 -#define OV0_P1_V_ACCUM_INIT 0x0428 -#define OV0_P23_V_ACCUM_INIT 0x042C -#define OV0_P1_BLANK_LINES_AT_TOP 0x0430 -#define OV0_P23_BLANK_LINES_AT_TOP 0x0434 -#define OV0_BASE_ADDR 0x043C -#define OV0_VID_BUF0_BASE_ADRS 0x0440 -#define OV0_VID_BUF1_BASE_ADRS 0x0444 -#define OV0_VID_BUF2_BASE_ADRS 0x0448 -#define OV0_VID_BUF3_BASE_ADRS 0x044C +#define OV0_Y_X_END 0x0404 +#define OV0_PIPELINE_CNTL 0x0408 +#define OV0_REG_LOAD_CNTL 0x0410 +#define OV0_SCALE_CNTL 0x0420 +#define OV0_V_INC 0x0424 +#define OV0_P1_V_ACCUM_INIT 0x0428 +#define OV0_P23_V_ACCUM_INIT 0x042C +#define OV0_P1_BLANK_LINES_AT_TOP 0x0430 +#define OV0_P23_BLANK_LINES_AT_TOP 0x0434 +#define OV0_BASE_ADDR 0x043C +#define OV0_VID_BUF0_BASE_ADRS 0x0440 +#define OV0_VID_BUF1_BASE_ADRS 0x0444 +#define OV0_VID_BUF2_BASE_ADRS 0x0448 +#define OV0_VID_BUF3_BASE_ADRS 0x044C #define OV0_VID_BUF4_BASE_ADRS 0x0450 #define OV0_VID_BUF5_BASE_ADRS 0x0454 #define OV0_VID_BUF_PITCH0_VALUE 0x0460 -#define OV0_VID_BUF_PITCH1_VALUE 0x0464 -#define OV0_AUTO_FLIP_CNTRL 0x0470 -#define OV0_DEINTERLACE_PATTERN 0x0474 -#define OV0_SUBMIT_HISTORY 0x0478 -#define OV0_H_INC 0x0480 -#define OV0_STEP_BY 0x0484 -#define OV0_P1_H_ACCUM_INIT 0x0488 -#define OV0_P23_H_ACCUM_INIT 0x048C -#define OV0_P1_X_START_END 0x0494 -#define OV0_P2_X_START_END 0x0498 -#define OV0_P3_X_START_END 0x049C -#define OV0_FILTER_CNTL 0x04A0 -#define OV0_FOUR_TAP_COEF_0 0x04B0 -#define OV0_FOUR_TAP_COEF_1 0x04B4 +#define OV0_VID_BUF_PITCH1_VALUE 0x0464 +#define OV0_AUTO_FLIP_CNTRL 0x0470 +#define OV0_DEINTERLACE_PATTERN 0x0474 +#define OV0_SUBMIT_HISTORY 0x0478 +#define OV0_H_INC 0x0480 +#define OV0_STEP_BY 0x0484 +#define OV0_P1_H_ACCUM_INIT 0x0488 +#define OV0_P23_H_ACCUM_INIT 0x048C +#define OV0_P1_X_START_END 0x0494 +#define OV0_P2_X_START_END 0x0498 +#define OV0_P3_X_START_END 0x049C +#define OV0_FILTER_CNTL 0x04A0 +#define OV0_FOUR_TAP_COEF_0 0x04B0 +#define OV0_FOUR_TAP_COEF_1 0x04B4 #define OV0_FOUR_TAP_COEF_2 0x04B8 #define OV0_FOUR_TAP_COEF_3 0x04BC #define OV0_FOUR_TAP_COEF_4 0x04C0 -#define OV0_FLAG_CNTRL 0x04DC -#define OV0_SLICE_CNTL 0x04E0 -#define OV0_VID_KEY_CLR_LOW 0x04E4 -#define OV0_VID_KEY_CLR_HIGH 0x04E8 -#define OV0_GRPH_KEY_CLR_LOW 0x04EC -#define OV0_GRPH_KEY_CLR_HIGH 0x04F0 -#define OV0_KEY_CNTL 0x04F4 -#define OV0_TEST 0x04F8 -#define SUBPIC_CNTL 0x0540 -#define SUBPIC_DEFCOLCON 0x0544 -#define SUBPIC_Y_X_START 0x054C -#define SUBPIC_Y_X_END 0x0550 -#define SUBPIC_V_INC 0x0554 -#define SUBPIC_H_INC 0x0558 +#define OV0_FLAG_CNTRL 0x04DC +#define OV0_SLICE_CNTL 0x04E0 +#define OV0_VID_KEY_CLR_LOW 0x04E4 +#define OV0_VID_KEY_CLR_HIGH 0x04E8 +#define OV0_GRPH_KEY_CLR_LOW 0x04EC +#define OV0_GRPH_KEY_CLR_HIGH 0x04F0 +#define OV0_KEY_CNTL 0x04F4 +#define OV0_TEST 0x04F8 +#define SUBPIC_CNTL 0x0540 +#define SUBPIC_DEFCOLCON 0x0544 +#define SUBPIC_Y_X_START 0x054C +#define SUBPIC_Y_X_END 0x0550 +#define SUBPIC_V_INC 0x0554 +#define SUBPIC_H_INC 0x0558 #define SUBPIC_BUF0_OFFSET 0x055C #define SUBPIC_BUF1_OFFSET 0x0560 #define SUBPIC_LC0_OFFSET 0x0564 -#define SUBPIC_LC1_OFFSET 0x0568 -#define SUBPIC_PITCH 0x056C -#define SUBPIC_BTN_HLI_COLCON 0x0570 -#define SUBPIC_BTN_HLI_Y_X_START 0x0574 -#define SUBPIC_BTN_HLI_Y_X_END 0x0578 -#define SUBPIC_PALETTE_INDEX 0x057C -#define SUBPIC_PALETTE_DATA 0x0580 -#define SUBPIC_H_ACCUM_INIT 0x0584 -#define SUBPIC_V_ACCUM_INIT 0x0588 -#define DISP_MISC_CNTL 0x0D00 -#define DAC_MACRO_CNTL 0x0D04 -#define DISP_PWR_MAN 0x0D08 -#define DISP_TEST_DEBUG_CNTL 0x0D10 -#define DISP_HW_DEBUG 0x0D14 +#define SUBPIC_LC1_OFFSET 0x0568 +#define SUBPIC_PITCH 0x056C +#define SUBPIC_BTN_HLI_COLCON 0x0570 +#define SUBPIC_BTN_HLI_Y_X_START 0x0574 +#define SUBPIC_BTN_HLI_Y_X_END 0x0578 +#define SUBPIC_PALETTE_INDEX 0x057C +#define SUBPIC_PALETTE_DATA 0x0580 +#define SUBPIC_H_ACCUM_INIT 0x0584 +#define SUBPIC_V_ACCUM_INIT 0x0588 +#define DISP_MISC_CNTL 0x0D00 +#define DAC_MACRO_CNTL 0x0D04 +#define DISP_PWR_MAN 0x0D08 +#define DISP_TEST_DEBUG_CNTL 0x0D10 +#define DISP_HW_DEBUG 0x0D14 #define DAC_CRC_SIG1 0x0D18 #define DAC_CRC_SIG2 0x0D1C #define OV0_LIN_TRANS_A 0x0D20 -#define OV0_LIN_TRANS_B 0x0D24 -#define OV0_LIN_TRANS_C 0x0D28 -#define OV0_LIN_TRANS_D 0x0D2C -#define OV0_LIN_TRANS_E 0x0D30 -#define OV0_LIN_TRANS_F 0x0D34 -#define OV0_GAMMA_0_F 0x0D40 -#define OV0_GAMMA_10_1F 0x0D44 -#define OV0_GAMMA_20_3F 0x0D48 -#define OV0_GAMMA_40_7F 0x0D4C -#define OV0_GAMMA_380_3BF 0x0D50 -#define OV0_GAMMA_3C0_3FF 0x0D54 -#define DISP_MERGE_CNTL 0x0D60 -#define DISP_OUTPUT_CNTL 0x0D64 -#define DISP_LIN_TRANS_GRPH_A 0x0D80 +#define OV0_LIN_TRANS_B 0x0D24 +#define OV0_LIN_TRANS_C 0x0D28 +#define OV0_LIN_TRANS_D 0x0D2C +#define OV0_LIN_TRANS_E 0x0D30 +#define OV0_LIN_TRANS_F 0x0D34 +#define OV0_GAMMA_0_F 0x0D40 +#define OV0_GAMMA_10_1F 0x0D44 +#define OV0_GAMMA_20_3F 0x0D48 +#define OV0_GAMMA_40_7F 0x0D4C +#define OV0_GAMMA_380_3BF 0x0D50 +#define OV0_GAMMA_3C0_3FF 0x0D54 +#define DISP_MERGE_CNTL 0x0D60 +#define DISP_OUTPUT_CNTL 0x0D64 +#define DISP_LIN_TRANS_GRPH_A 0x0D80 #define DISP_LIN_TRANS_GRPH_B 0x0D84 #define DISP_LIN_TRANS_GRPH_C 0x0D88 #define DISP_LIN_TRANS_GRPH_D 0x0D8C -#define DISP_LIN_TRANS_GRPH_E 0x0D90 -#define DISP_LIN_TRANS_GRPH_F 0x0D94 -#define DISP_LIN_TRANS_VID_A 0x0D98 -#define DISP_LIN_TRANS_VID_B 0x0D9C -#define DISP_LIN_TRANS_VID_C 0x0DA0 -#define DISP_LIN_TRANS_VID_D 0x0DA4 -#define DISP_LIN_TRANS_VID_E 0x0DA8 -#define DISP_LIN_TRANS_VID_F 0x0DAC -#define RMX_HORZ_FILTER_0TAP_COEF 0x0DB0 -#define RMX_HORZ_FILTER_1TAP_COEF 0x0DB4 -#define RMX_HORZ_FILTER_2TAP_COEF 0x0DB8 -#define RMX_HORZ_PHASE 0x0DBC -#define DAC_EMBEDDED_SYNC_CNTL 0x0DC0 -#define DAC_BROAD_PULSE 0x0DC4 +#define DISP_LIN_TRANS_GRPH_E 0x0D90 +#define DISP_LIN_TRANS_GRPH_F 0x0D94 +#define DISP_LIN_TRANS_VID_A 0x0D98 +#define DISP_LIN_TRANS_VID_B 0x0D9C +#define DISP_LIN_TRANS_VID_C 0x0DA0 +#define DISP_LIN_TRANS_VID_D 0x0DA4 +#define DISP_LIN_TRANS_VID_E 0x0DA8 +#define DISP_LIN_TRANS_VID_F 0x0DAC +#define RMX_HORZ_FILTER_0TAP_COEF 0x0DB0 +#define RMX_HORZ_FILTER_1TAP_COEF 0x0DB4 +#define RMX_HORZ_FILTER_2TAP_COEF 0x0DB8 +#define RMX_HORZ_PHASE 0x0DBC +#define DAC_EMBEDDED_SYNC_CNTL 0x0DC0 +#define DAC_BROAD_PULSE 0x0DC4 #define DAC_SKEW_CLKS 0x0DC8 #define DAC_INCR 0x0DCC #define DAC_NEG_SYNC_LEVEL 0x0DD0 -#define DAC_POS_SYNC_LEVEL 0x0DD4 -#define DAC_BLANK_LEVEL 0x0DD8 -#define CLOCK_CNTL_INDEX 0x0008 -#define CLOCK_CNTL_DATA 0x000C -#define CP_RB_CNTL 0x0704 -#define CP_RB_BASE 0x0700 -#define CP_RB_RPTR_ADDR 0x070C -#define CP_RB_RPTR 0x0710 -#define CP_RB_WPTR 0x0714 -#define CP_RB_WPTR_DELAY 0x0718 -#define CP_IB_BASE 0x0738 -#define CP_IB_BUFSZ 0x073C -#define SCRATCH_REG0 0x15E0 -#define GUI_SCRATCH_REG0 0x15E0 -#define SCRATCH_REG1 0x15E4 -#define GUI_SCRATCH_REG1 0x15E4 +#define DAC_POS_SYNC_LEVEL 0x0DD4 +#define DAC_BLANK_LEVEL 0x0DD8 +#define CLOCK_CNTL_INDEX 0x0008 +#define CLOCK_CNTL_DATA 0x000C +#define CP_RB_CNTL 0x0704 +#define CP_RB_BASE 0x0700 +#define CP_RB_RPTR_ADDR 0x070C +#define CP_RB_RPTR 0x0710 +#define CP_RB_WPTR 0x0714 +#define CP_RB_WPTR_DELAY 0x0718 +#define CP_IB_BASE 0x0738 +#define CP_IB_BUFSZ 0x073C +#define SCRATCH_REG0 0x15E0 +#define GUI_SCRATCH_REG0 0x15E0 +#define SCRATCH_REG1 0x15E4 +#define GUI_SCRATCH_REG1 0x15E4 #define SCRATCH_REG2 0x15E8 #define GUI_SCRATCH_REG2 0x15E8 #define SCRATCH_REG3 0x15EC -#define GUI_SCRATCH_REG3 0x15EC -#define SCRATCH_REG4 0x15F0 -#define GUI_SCRATCH_REG4 0x15F0 -#define SCRATCH_REG5 0x15F4 -#define GUI_SCRATCH_REG5 0x15F4 -#define SCRATCH_UMSK 0x0770 -#define SCRATCH_ADDR 0x0774 -#define DP_BRUSH_FRGD_CLR 0x147C +#define GUI_SCRATCH_REG3 0x15EC +#define SCRATCH_REG4 0x15F0 +#define GUI_SCRATCH_REG4 0x15F0 +#define SCRATCH_REG5 0x15F4 +#define GUI_SCRATCH_REG5 0x15F4 +#define SCRATCH_UMSK 0x0770 +#define SCRATCH_ADDR 0x0774 +#define DP_BRUSH_FRGD_CLR 0x147C #define DP_BRUSH_BKGD_CLR 0x1478 #define DST_LINE_START 0x1600 -#define DST_LINE_END 0x1604 -#define SRC_OFFSET 0x15AC +#define DST_LINE_END 0x1604 +#define SRC_OFFSET 0x15AC #define SRC_PITCH 0x15B0 #define SRC_TILE 0x1704 #define SRC_PITCH_OFFSET 0x1428 -#define SRC_X 0x1414 -#define SRC_Y 0x1418 -#define SRC_X_Y 0x1590 -#define SRC_Y_X 0x1434 +#define SRC_X 0x1414 +#define SRC_Y 0x1418 +#define SRC_X_Y 0x1590 +#define SRC_Y_X 0x1434 #define DST_Y_X 0x1438 #define DST_WIDTH_HEIGHT 0x1598 #define DST_HEIGHT_WIDTH 0x143c #define DST_OFFSET 0x1404 -#define SRC_CLUT_ADDRESS 0x1780 -#define SRC_CLUT_DATA 0x1784 -#define SRC_CLUT_DATA_RD 0x1788 -#define HOST_DATA0 0x17C0 -#define HOST_DATA1 0x17C4 -#define HOST_DATA2 0x17C8 -#define HOST_DATA3 0x17CC -#define HOST_DATA4 0x17D0 -#define HOST_DATA5 0x17D4 -#define HOST_DATA6 0x17D8 +#define SRC_CLUT_ADDRESS 0x1780 +#define SRC_CLUT_DATA 0x1784 +#define SRC_CLUT_DATA_RD 0x1788 +#define HOST_DATA0 0x17C0 +#define HOST_DATA1 0x17C4 +#define HOST_DATA2 0x17C8 +#define HOST_DATA3 0x17CC +#define HOST_DATA4 0x17D0 +#define HOST_DATA5 0x17D4 +#define HOST_DATA6 0x17D8 #define HOST_DATA7 0x17DC #define HOST_DATA_LAST 0x17E0 #define DP_SRC_ENDIAN 0x15D4 -#define DP_SRC_FRGD_CLR 0x15D8 -#define DP_SRC_BKGD_CLR 0x15DC -#define SC_LEFT 0x1640 -#define SC_RIGHT 0x1644 -#define SC_TOP 0x1648 -#define SC_BOTTOM 0x164C -#define SRC_SC_RIGHT 0x1654 -#define SRC_SC_BOTTOM 0x165C -#define DP_CNTL 0x16C0 -#define DP_CNTL_XDIR_YDIR_YMAJOR 0x16D0 -#define DP_DATATYPE 0x16C4 -#define DP_MIX 0x16C8 -#define DP_WRITE_MSK 0x16CC -#define DP_XOP 0x17F8 +#define DP_SRC_FRGD_CLR 0x15D8 +#define DP_SRC_BKGD_CLR 0x15DC +#define SC_LEFT 0x1640 +#define SC_RIGHT 0x1644 +#define SC_TOP 0x1648 +#define SC_BOTTOM 0x164C +#define SRC_SC_RIGHT 0x1654 +#define SRC_SC_BOTTOM 0x165C +#define DP_CNTL 0x16C0 +#define DP_CNTL_XDIR_YDIR_YMAJOR 0x16D0 +#define DP_DATATYPE 0x16C4 +#define DP_MIX 0x16C8 +#define DP_WRITE_MSK 0x16CC +#define DP_XOP 0x17F8 #define CLR_CMP_CLR_SRC 0x15C4 #define CLR_CMP_CLR_DST 0x15C8 #define CLR_CMP_CNTL 0x15C0 -#define CLR_CMP_MSK 0x15CC -#define DSTCACHE_MODE 0x1710 -#define DSTCACHE_CTLSTAT 0x1714 -#define DEFAULT_PITCH_OFFSET 0x16E0 -#define DEFAULT_SC_BOTTOM_RIGHT 0x16E8 +#define CLR_CMP_MSK 0x15CC +#define DSTCACHE_MODE 0x1710 +#define DSTCACHE_CTLSTAT 0x1714 +#define DEFAULT_PITCH_OFFSET 0x16E0 +#define DEFAULT_SC_BOTTOM_RIGHT 0x16E8 #define DEFAULT_SC_TOP_LEFT 0x16EC #define SRC_PITCH_OFFSET 0x1428 #define DST_PITCH_OFFSET 0x142C -#define DP_GUI_MASTER_CNTL 0x146C -#define SC_TOP_LEFT 0x16EC -#define SC_BOTTOM_RIGHT 0x16F0 -#define SRC_SC_BOTTOM_RIGHT 0x16F4 +#define DP_GUI_MASTER_CNTL 0x146C +#define SC_TOP_LEFT 0x16EC +#define SC_BOTTOM_RIGHT 0x16F0 +#define SRC_SC_BOTTOM_RIGHT 0x16F4 #define RB2D_DSTCACHE_MODE 0x3428 #define RB2D_DSTCACHE_CTLSTAT_broken 0x342C /* do not use */ #define LVDS_GEN_CNTL 0x02d0 @@ -686,7 +686,7 @@ #define VERT_FP_LOOP_STRETCH (0x7 << 28) #define VERT_STRETCH_RESERVED 0xf1000000 -/* DAC_CNTL bit constants */ +/* DAC_CNTL bit constants */ #define DAC_8BIT_EN 0x00000100 #define DAC_4BPP_PIX_ORDER 0x00000200 #define DAC_CRC_EN 0x00080000 @@ -700,7 +700,7 @@ #define DAC_CMP_EN (1 << 3) #define DAC_CMP_OUTPUT (1 << 7) -/* DAC_CNTL2 bit constants */ +/* DAC_CNTL2 bit constants */ #define DAC2_EXPAND_MODE (1 << 14) #define DAC2_CMP_EN (1 << 7) #define DAC2_PALETTE_ACCESS_CNTL (1 << 5) diff --git a/include/video/s1d13xxxfb.h b/include/video/s1d13xxxfb.h index fe41b8407946..c3b2a2aa7140 100644 --- a/include/video/s1d13xxxfb.h +++ b/include/video/s1d13xxxfb.h @@ -14,13 +14,16 @@ #define S1D13XXXFB_H #define S1D_PALETTE_SIZE 256 -#define S1D13506_CHIP_REV 4 /* expected chip revision number for s1d13506 */ -#define S1D13806_CHIP_REV 7 /* expected chip revision number for s1d13806 */ -#define S1D_FBID "S1D13806" -#define S1D_DEVICENAME "s1d13806fb" +#define S1D_FBID "S1D13xxx" +#define S1D_DEVICENAME "s1d13xxxfb" + +/* S1DREG_REV_CODE register = prod_id (6 bits) + revision (2 bits) */ +#define S1D13505_PROD_ID 0x3 /* 000011 */ +#define S1D13506_PROD_ID 0x4 /* 000100 */ +#define S1D13806_PROD_ID 0x7 /* 000111 */ /* register definitions (tested on s1d13896) */ -#define S1DREG_REV_CODE 0x0000 /* Revision Code Register */ +#define S1DREG_REV_CODE 0x0000 /* Prod + Rev Code Register */ #define S1DREG_MISC 0x0001 /* Miscellaneous Register */ #define S1DREG_GPIO_CNF0 0x0004 /* General IO Pins Configuration Register 0 */ #define S1DREG_GPIO_CNF1 0x0005 /* General IO Pins Configuration Register 1 */ @@ -141,10 +144,11 @@ struct s1d13xxxfb_regval { u8 value; }; - struct s1d13xxxfb_par { void __iomem *regs; unsigned char display; + unsigned char prod_id; + unsigned char revision; unsigned int pseudo_palette[16]; #ifdef CONFIG_PM diff --git a/init/main.c b/init/main.c index d6b388fbffa6..07c8658ffca5 100644 --- a/init/main.c +++ b/init/main.c @@ -793,6 +793,7 @@ static void run_init_process(char *init_filename) * makes it inline to init() and it becomes part of init.text section */ static noinline int init_post(void) + __releases(kernel_lock) { /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e886d1332a10..f3db382c2b2d 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -22,6 +22,7 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/freezer.h> +#include <asm/suspend.h> #include "power.h" diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f5fc2d7680f2..33e2e4a819f9 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -321,13 +321,10 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) INIT_LIST_HEAD(list); - for_each_zone(zone) { + for_each_populated_zone(zone) { unsigned long zone_start, zone_end; struct mem_extent *ext, *cur, *aux; - if (!populated_zone(zone)) - continue; - zone_start = zone->zone_start_pfn; zone_end = zone->zone_start_pfn + zone->spanned_pages; @@ -804,8 +801,8 @@ static unsigned int count_free_highmem_pages(void) struct zone *zone; unsigned int cnt = 0; - for_each_zone(zone) - if (populated_zone(zone) && is_highmem(zone)) + for_each_populated_zone(zone) + if (is_highmem(zone)) cnt += zone_page_state(zone, NR_FREE_PAGES); return cnt; diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index a92c91451559..78c35047586d 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -51,6 +51,7 @@ #include <linux/highmem.h> #include <linux/time.h> #include <linux/rbtree.h> +#include <linux/io.h> #include "power.h" @@ -229,17 +230,16 @@ int swsusp_shrink_memory(void) size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; tmp = size; size += highmem_size; - for_each_zone (zone) - if (populated_zone(zone)) { - tmp += snapshot_additional_pages(zone); - if (is_highmem(zone)) { - highmem_size -= + for_each_populated_zone(zone) { + tmp += snapshot_additional_pages(zone); + if (is_highmem(zone)) { + highmem_size -= zone_page_state(zone, NR_FREE_PAGES); - } else { - tmp -= zone_page_state(zone, NR_FREE_PAGES); - tmp += zone->lowmem_reserve[ZONE_NORMAL]; - } + } else { + tmp -= zone_page_state(zone, NR_FREE_PAGES); + tmp += zone->lowmem_reserve[ZONE_NORMAL]; } + } if (highmem_size < 0) highmem_size = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 196d48babbef..73513f4e19df 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5196,11 +5196,17 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) __wake_up_common(q, mode, 1, 0, NULL); } +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) +{ + __wake_up_common(q, mode, 1, 0, key); +} + /** - * __wake_up_sync - wake up threads blocked on a waitqueue. + * __wake_up_sync_key - wake up threads blocked on a waitqueue. * @q: the waitqueue * @mode: which threads * @nr_exclusive: how many wake-one or wake-many threads to wake up + * @key: opaque value to be passed to wakeup targets * * The sync wakeup differs that the waker knows that it will schedule * away soon, so while the target thread will be woken up, it will not @@ -5209,8 +5215,8 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) * * On UP it can prevent extra preemption. */ -void -__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) +void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, void *key) { unsigned long flags; int sync = 1; @@ -5222,9 +5228,18 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) sync = 0; spin_lock_irqsave(&q->lock, flags); - __wake_up_common(q, mode, nr_exclusive, sync, NULL); + __wake_up_common(q, mode, nr_exclusive, sync, key); spin_unlock_irqrestore(&q->lock, flags); } +EXPORT_SYMBOL_GPL(__wake_up_sync_key); + +/* + * __wake_up_sync - see __wake_up_sync_key() + */ +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) +{ + __wake_up_sync_key(q, mode, nr_exclusive, NULL); +} EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ /** diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c5ef44ff850f..2e490a389dd2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1010,7 +1010,7 @@ static struct ctl_table vm_table[] = { .data = &dirty_expire_interval, .maxlen = sizeof(dirty_expire_interval), .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, + .proc_handler = &proc_dointvec, }, { .ctl_name = VM_NR_PDFLUSH_THREADS, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 58bfe7e8faba..9638d99644af 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -796,6 +796,7 @@ config SYSCTL_SYSCALL_CHECK to properly maintain and use. This enables checks that help you to keep things correct. +source mm/Kconfig.debug source kernel/trace/Kconfig config PROVIDE_OHCI1394_DMA_INIT diff --git a/lib/rbtree.c b/lib/rbtree.c index 9956b99649f0..f653659e0bc1 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -163,17 +163,14 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, { if (!other->rb_right || rb_is_black(other->rb_right)) { - struct rb_node *o_left; - if ((o_left = other->rb_left)) - rb_set_black(o_left); + rb_set_black(other->rb_left); rb_set_red(other); __rb_rotate_right(other, root); other = parent->rb_right; } rb_set_color(other, rb_color(parent)); rb_set_black(parent); - if (other->rb_right) - rb_set_black(other->rb_right); + rb_set_black(other->rb_right); __rb_rotate_left(parent, root); node = root->rb_node; break; @@ -200,17 +197,14 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, { if (!other->rb_left || rb_is_black(other->rb_left)) { - register struct rb_node *o_right; - if ((o_right = other->rb_right)) - rb_set_black(o_right); + rb_set_black(other->rb_right); rb_set_red(other); __rb_rotate_left(other, root); other = parent->rb_left; } rb_set_color(other, rb_color(parent)); rb_set_black(parent); - if (other->rb_left) - rb_set_black(other->rb_left); + rb_set_black(other->rb_left); __rb_rotate_right(parent, root); node = root->rb_node; break; diff --git a/mm/Kconfig b/mm/Kconfig index a5b77811fdf2..b53427ad30a3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -206,7 +206,6 @@ config VIRT_TO_BUS config UNEVICTABLE_LRU bool "Add LRU list to track non-evictable pages" default y - depends on MMU help Keeps unevictable pages off of the active and inactive pageout lists, so kswapd will not waste CPU time or have its balancing @@ -214,5 +213,13 @@ config UNEVICTABLE_LRU will use one page flag and increase the code size a little, say Y unless you know what you are doing. +config HAVE_MLOCK + bool + default y if MMU=y + +config HAVE_MLOCKED_PAGE_BIT + bool + default y if HAVE_MLOCK=y && UNEVICTABLE_LRU=y + config MMU_NOTIFIER bool diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug new file mode 100644 index 000000000000..c8d62d49a44e --- /dev/null +++ b/mm/Kconfig.debug @@ -0,0 +1,17 @@ +config WANT_PAGE_DEBUG_FLAGS + bool + +config PAGE_POISONING + bool "Debug page memory allocations" + depends on DEBUG_KERNEL && !ARCH_SUPPORTS_DEBUG_PAGEALLOC + depends on !HIBERNATION + select DEBUG_PAGEALLOC + select WANT_PAGE_DEBUG_FLAGS + help + Fill the pages with poison patterns after free_pages() and verify + the patterns before alloc_pages(). This results in a large slowdown, + but helps to find certain types of memory corruptions. + + This option cannot enalbe with hibernation. Otherwise, it will get + wrong messages for memory corruption because the free pages are not + saved to the suspend image. diff --git a/mm/Makefile b/mm/Makefile index 818569b68f46..ec73c68b6015 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o +obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_FAILSLAB) += failslab.o diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c new file mode 100644 index 000000000000..a1e3324de2b5 --- /dev/null +++ b/mm/debug-pagealloc.c @@ -0,0 +1,129 @@ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/page-debug-flags.h> +#include <linux/poison.h> + +static inline void set_page_poison(struct page *page) +{ + __set_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static inline void clear_page_poison(struct page *page) +{ + __clear_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static inline bool page_poison(struct page *page) +{ + return test_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static void poison_highpage(struct page *page) +{ + /* + * Page poisoning for highmem pages is not implemented. + * + * This can be called from interrupt contexts. + * So we need to create a new kmap_atomic slot for this + * application and it will need interrupt protection. + */ +} + +static void poison_page(struct page *page) +{ + void *addr; + + if (PageHighMem(page)) { + poison_highpage(page); + return; + } + set_page_poison(page); + addr = page_address(page); + memset(addr, PAGE_POISON, PAGE_SIZE); +} + +static void poison_pages(struct page *page, int n) +{ + int i; + + for (i = 0; i < n; i++) + poison_page(page + i); +} + +static bool single_bit_flip(unsigned char a, unsigned char b) +{ + unsigned char error = a ^ b; + + return error && !(error & (error - 1)); +} + +static void check_poison_mem(unsigned char *mem, size_t bytes) +{ + unsigned char *start; + unsigned char *end; + + for (start = mem; start < mem + bytes; start++) { + if (*start != PAGE_POISON) + break; + } + if (start == mem + bytes) + return; + + for (end = mem + bytes - 1; end > start; end--) { + if (*end != PAGE_POISON) + break; + } + + if (!printk_ratelimit()) + return; + else if (start == end && single_bit_flip(*start, PAGE_POISON)) + printk(KERN_ERR "pagealloc: single bit error\n"); + else + printk(KERN_ERR "pagealloc: memory corruption\n"); + + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start, + end - start + 1, 1); + dump_stack(); +} + +static void unpoison_highpage(struct page *page) +{ + /* + * See comment in poison_highpage(). + * Highmem pages should not be poisoned for now + */ + BUG_ON(page_poison(page)); +} + +static void unpoison_page(struct page *page) +{ + if (PageHighMem(page)) { + unpoison_highpage(page); + return; + } + if (page_poison(page)) { + void *addr = page_address(page); + + check_poison_mem(addr, PAGE_SIZE); + clear_page_poison(page); + } +} + +static void unpoison_pages(struct page *page, int n) +{ + int i; + + for (i = 0; i < n; i++) + unpoison_page(page + i); +} + +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (!debug_pagealloc_enabled) + return; + + if (enable) + unpoison_pages(page, numpages); + else + poison_pages(page, numpages); +} diff --git a/mm/highmem.c b/mm/highmem.c index 910198037bf5..68eb1d9b63fa 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -422,3 +422,48 @@ void __init page_address_init(void) } #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ + +#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT) + +void debug_kmap_atomic(enum km_type type) +{ + static unsigned warn_count = 10; + + if (unlikely(warn_count == 0)) + return; + + if (unlikely(in_interrupt())) { + if (in_irq()) { + if (type != KM_IRQ0 && type != KM_IRQ1 && + type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && + type != KM_BOUNCE_READ) { + WARN_ON(1); + warn_count--; + } + } else if (!irqs_disabled()) { /* softirq */ + if (type != KM_IRQ0 && type != KM_IRQ1 && + type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && + type != KM_SKB_SUNRPC_DATA && + type != KM_SKB_DATA_SOFTIRQ && + type != KM_BOUNCE_READ) { + WARN_ON(1); + warn_count--; + } + } + } + + if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || + type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { + if (!irqs_disabled()) { + WARN_ON(1); + warn_count--; + } + } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { + if (irq_count() == 0 && !irqs_disabled()) { + WARN_ON(1); + warn_count--; + } + } +} + +#endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 107da3d809a8..28c655ba9353 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -918,7 +918,7 @@ static void return_unused_surplus_pages(struct hstate *h, * an instantiated the change should be committed via vma_commit_reservation. * No action is required on failure. */ -static int vma_needs_reservation(struct hstate *h, +static long vma_needs_reservation(struct hstate *h, struct vm_area_struct *vma, unsigned long addr) { struct address_space *mapping = vma->vm_file->f_mapping; @@ -933,7 +933,7 @@ static int vma_needs_reservation(struct hstate *h, return 1; } else { - int err; + long err; pgoff_t idx = vma_hugecache_offset(h, vma, addr); struct resv_map *reservations = vma_resv_map(vma); @@ -969,7 +969,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *page; struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; - unsigned int chg; + long chg; /* * Processes that did not create the mapping will have no reserves and diff --git a/mm/internal.h b/mm/internal.h index 478223b73a2a..987bb03fbdd8 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -63,6 +63,7 @@ static inline unsigned long page_order(struct page *page) return page_private(page); } +#ifdef CONFIG_HAVE_MLOCK extern long mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void munlock_vma_pages_range(struct vm_area_struct *vma, @@ -71,6 +72,7 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) { munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); } +#endif #ifdef CONFIG_UNEVICTABLE_LRU /* @@ -90,7 +92,7 @@ static inline void unevictable_migrate_page(struct page *new, struct page *old) } #endif -#ifdef CONFIG_UNEVICTABLE_LRU +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT /* * Called only in fault path via page_evictable() for a new page * to determine if it's being mapped into a LOCKED vma. @@ -165,7 +167,7 @@ static inline void free_page_mlock(struct page *page) } } -#else /* CONFIG_UNEVICTABLE_LRU */ +#else /* CONFIG_HAVE_MLOCKED_PAGE_BIT */ static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) { return 0; @@ -175,7 +177,7 @@ static inline void mlock_vma_page(struct page *page) { } static inline void mlock_migrate_page(struct page *new, struct page *old) { } static inline void free_page_mlock(struct page *page) { } -#endif /* CONFIG_UNEVICTABLE_LRU */ +#endif /* CONFIG_HAVE_MLOCKED_PAGE_BIT */ /* * Return the mem_map entry representing the 'offset' subpage within diff --git a/mm/memory.c b/mm/memory.c index 2032ad2fc34b..cf6873e91c6a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1151,6 +1151,11 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) set_page_dirty(page); + /* + * pte_mkyoung() would be more correct here, but atomic care + * is needed to avoid losing the dirty bit: it is easier to use + * mark_page_accessed(). + */ mark_page_accessed(page); } unlock: @@ -1940,6 +1945,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, * get_user_pages(.write=1, .force=1). */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { + struct vm_fault vmf; + int tmp; + + vmf.virtual_address = (void __user *)(address & + PAGE_MASK); + vmf.pgoff = old_page->index; + vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + vmf.page = old_page; + /* * Notify the address space that the page is about to * become writable so that it can prohibit this or wait @@ -1951,8 +1965,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, page_cache_get(old_page); pte_unmap_unlock(page_table, ptl); - if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); + if (unlikely(tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { + ret = tmp; goto unwritable_page; + } /* * Since we dropped the lock we need to revalidate @@ -2101,7 +2119,7 @@ oom: unwritable_page: page_cache_release(old_page); - return VM_FAULT_SIGBUS; + return ret; } /* @@ -2435,8 +2453,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(PGMAJFAULT); } - mark_page_accessed(page); - lock_page(page); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); @@ -2645,9 +2661,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, * to become writable */ if (vma->vm_ops->page_mkwrite) { + int tmp; + unlock_page(page); - if (vma->vm_ops->page_mkwrite(vma, page) < 0) { - ret = VM_FAULT_SIGBUS; + vmf.flags |= FAULT_FLAG_MKWRITE; + tmp = vma->vm_ops->page_mkwrite(vma, &vmf); + if (unlikely(tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { + ret = tmp; anon = 1; /* no anon but release vmf.page */ goto out_unlocked; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 40ba05061a4f..d3b9bac085b5 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -55,7 +55,7 @@ static DEFINE_SPINLOCK(zone_scan_lock); unsigned long badness(struct task_struct *p, unsigned long uptime) { - unsigned long points, cpu_time, run_time, s; + unsigned long points, cpu_time, run_time; struct mm_struct *mm; struct task_struct *child; @@ -110,12 +110,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) else run_time = 0; - s = int_sqrt(cpu_time); - if (s) - points /= s; - s = int_sqrt(int_sqrt(run_time)); - if (s) - points /= s; + if (cpu_time) + points /= int_sqrt(cpu_time); + if (run_time) + points /= int_sqrt(int_sqrt(run_time)); /* * Niced processes are most likely less important, so double diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 40ca7cdb653e..30351f0063ac 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -92,14 +92,14 @@ int vm_dirty_ratio = 20; unsigned long vm_dirty_bytes; /* - * The interval between `kupdate'-style writebacks, in jiffies + * The interval between `kupdate'-style writebacks */ -int dirty_writeback_interval = 5 * HZ; +unsigned int dirty_writeback_interval = 5 * 100; /* sentiseconds */ /* - * The longest number of jiffies for which data is allowed to remain dirty + * The longest time for which data is allowed to remain dirty */ -int dirty_expire_interval = 30 * HZ; +unsigned int dirty_expire_interval = 30 * 100; /* sentiseconds */ /* * Flag that makes the machine dump writes/reads and block dirtyings. @@ -770,9 +770,9 @@ static void wb_kupdate(unsigned long arg) sync_supers(); - oldest_jif = jiffies - dirty_expire_interval; + oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval); start_jif = jiffies; - next_jif = start_jif + dirty_writeback_interval; + next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10); nr_to_write = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) + (inodes_stat.nr_inodes - inodes_stat.nr_unused); @@ -801,9 +801,10 @@ static void wb_kupdate(unsigned long arg) int dirty_writeback_centisecs_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_userhz_jiffies(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, file, buffer, length, ppos); if (dirty_writeback_interval) - mod_timer(&wb_timer, jiffies + dirty_writeback_interval); + mod_timer(&wb_timer, jiffies + + msecs_to_jiffies(dirty_writeback_interval * 10)); else del_timer(&wb_timer); return 0; @@ -905,7 +906,8 @@ void __init page_writeback_init(void) { int shift; - mod_timer(&wb_timer, jiffies + dirty_writeback_interval); + mod_timer(&wb_timer, + jiffies + msecs_to_jiffies(dirty_writeback_interval * 10)); writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); @@ -1198,6 +1200,20 @@ int __set_page_dirty_no_writeback(struct page *page) } /* + * Helper function for set_page_dirty family. + * NOTE: This relies on being atomic wrt interrupts. + */ +void account_page_dirtied(struct page *page, struct address_space *mapping) +{ + if (mapping_cap_account_dirty(mapping)) { + __inc_zone_page_state(page, NR_FILE_DIRTY); + __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + task_dirty_inc(current); + task_io_account_write(PAGE_CACHE_SIZE); + } +} + +/* * For address_spaces which do not use buffers. Just tag the page as dirty in * its radix tree. * @@ -1226,13 +1242,7 @@ int __set_page_dirty_nobuffers(struct page *page) if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); - if (mapping_cap_account_dirty(mapping)) { - __inc_zone_page_state(page, NR_FILE_DIRTY); - __inc_bdi_stat(mapping->backing_dev_info, - BDI_RECLAIMABLE); - task_dirty_inc(current); - task_io_account_write(PAGE_CACHE_SIZE); - } + account_page_dirtied(page, mapping); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a3803ea8c27d..0284e528748d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -922,13 +922,10 @@ static void drain_pages(unsigned int cpu) unsigned long flags; struct zone *zone; - for_each_zone(zone) { + for_each_populated_zone(zone) { struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - if (!populated_zone(zone)) - continue; - pset = zone_pcp(zone, cpu); pcp = &pset->pcp; @@ -1585,7 +1582,8 @@ nofail_alloc: reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - did_some_progress = try_to_free_pages(zonelist, order, gfp_mask); + did_some_progress = try_to_free_pages(zonelist, order, + gfp_mask, nodemask); p->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); @@ -1879,10 +1877,7 @@ void show_free_areas(void) int cpu; struct zone *zone; - for_each_zone(zone) { - if (!populated_zone(zone)) - continue; - + for_each_populated_zone(zone) { show_node(zone); printk("%s per-cpu:\n", zone->name); @@ -1922,12 +1917,9 @@ void show_free_areas(void) global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE)); - for_each_zone(zone) { + for_each_populated_zone(zone) { int i; - if (!populated_zone(zone)) - continue; - show_node(zone); printk("%s" " free:%lukB" @@ -1967,12 +1959,9 @@ void show_free_areas(void) printk("\n"); } - for_each_zone(zone) { + for_each_populated_zone(zone) { unsigned long nr[MAX_ORDER], flags, order, total = 0; - if (!populated_zone(zone)) - continue; - show_node(zone); printk("%s: ", zone->name); @@ -2784,11 +2773,7 @@ static int __cpuinit process_zones(int cpu) node_set_state(node, N_CPU); /* this node has a cpu */ - for_each_zone(zone) { - - if (!populated_zone(zone)) - continue; - + for_each_populated_zone(zone) { zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), GFP_KERNEL, node); if (!zone_pcp(zone, cpu)) diff --git a/mm/shmem.c b/mm/shmem.c index 7ec78e24a30d..d94d2e9146bc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1068,8 +1068,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) swap_duplicate(swap); BUG_ON(page_mapped(page)); page_cache_release(page); /* pagecache ref */ - set_page_dirty(page); - unlock_page(page); + swap_writepage(page, wbc); if (inode) { mutex_lock(&shmem_swaplist_mutex); /* move instead of add in case we're racing */ diff --git a/mm/sparse.c b/mm/sparse.c index 083f5b63e7a8..da432d9f0ae8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -164,9 +164,7 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, WARN_ON_ONCE(1); *start_pfn = max_sparsemem_pfn; *end_pfn = max_sparsemem_pfn; - } - - if (*end_pfn > max_sparsemem_pfn) { + } else if (*end_pfn > max_sparsemem_pfn) { mminit_dprintk(MMINIT_WARNING, "pfnvalidation", "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n", *start_pfn, *end_pfn, max_sparsemem_pfn); diff --git a/mm/swap.c b/mm/swap.c index 8adb9feb61e1..6e83084c1f6c 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -457,29 +457,6 @@ void pagevec_strip(struct pagevec *pvec) } /** - * pagevec_swap_free - try to free swap space from the pages in a pagevec - * @pvec: pagevec with swapcache pages to free the swap space of - * - * The caller needs to hold an extra reference to each page and - * not hold the page lock on the pages. This function uses a - * trylock on the page lock so it may not always free the swap - * space associated with a page. - */ -void pagevec_swap_free(struct pagevec *pvec) -{ - int i; - - for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - - if (PageSwapCache(page) && trylock_page(page)) { - try_to_free_swap(page); - unlock_page(page); - } - } -} - -/** * pagevec_lookup - gang pagecache lookup * @pvec: Where the resulting pages are placed * @mapping: The address_space to search diff --git a/mm/util.c b/mm/util.c index 37eaccdf3054..7c122e49f769 100644 --- a/mm/util.c +++ b/mm/util.c @@ -70,6 +70,36 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp) EXPORT_SYMBOL(kmemdup); /** + * memdup_user - duplicate memory region from user space + * + * @src: source address in user space + * @len: number of bytes to copy + * + * Returns an ERR_PTR() on failure. + */ +void *memdup_user(const void __user *src, size_t len) +{ + void *p; + + /* + * Always use GFP_KERNEL, since copy_from_user() can sleep and + * cause pagefault, which makes it pointless to use GFP_NOFS + * or GFP_ATOMIC. + */ + p = kmalloc_track_caller(len, GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(p, src, len)) { + kfree(p); + return ERR_PTR(-EFAULT); + } + + return p; +} +EXPORT_SYMBOL(memdup_user); + +/** * __krealloc - like krealloc() but don't free @p. * @p: object to reallocate memory for. * @new_size: how many bytes of memory are required. diff --git a/mm/vmalloc.c b/mm/vmalloc.c index af58324c361a..fab19876b4d1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -671,10 +671,7 @@ struct vmap_block { DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); union { - struct { - struct list_head free_list; - struct list_head dirty_list; - }; + struct list_head free_list; struct rcu_head rcu_head; }; }; @@ -741,7 +738,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS); bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); INIT_LIST_HEAD(&vb->free_list); - INIT_LIST_HEAD(&vb->dirty_list); vb_idx = addr_to_vb_idx(va->va_start); spin_lock(&vmap_block_tree_lock); @@ -772,12 +768,7 @@ static void free_vmap_block(struct vmap_block *vb) struct vmap_block *tmp; unsigned long vb_idx; - spin_lock(&vb->vbq->lock); - if (!list_empty(&vb->free_list)) - list_del(&vb->free_list); - if (!list_empty(&vb->dirty_list)) - list_del(&vb->dirty_list); - spin_unlock(&vb->vbq->lock); + BUG_ON(!list_empty(&vb->free_list)); vb_idx = addr_to_vb_idx(vb->va->va_start); spin_lock(&vmap_block_tree_lock); @@ -862,11 +853,7 @@ static void vb_free(const void *addr, unsigned long size) spin_lock(&vb->lock); bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order); - if (!vb->dirty) { - spin_lock(&vb->vbq->lock); - list_add(&vb->dirty_list, &vb->vbq->dirty); - spin_unlock(&vb->vbq->lock); - } + vb->dirty += 1UL << order; if (vb->dirty == VMAP_BBMAP_BITS) { BUG_ON(vb->free || !list_empty(&vb->free_list)); diff --git a/mm/vmscan.c b/mm/vmscan.c index 479e46719394..06e72693b458 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -60,8 +60,8 @@ struct scan_control { int may_writepage; - /* Can pages be swapped as part of reclaim? */ - int may_swap; + /* Can mapped pages be reclaimed? */ + int may_unmap; /* This context's SWAP_CLUSTER_MAX. If freeing memory for * suspend, we effectively ignore SWAP_CLUSTER_MAX. @@ -78,6 +78,12 @@ struct scan_control { /* Which cgroup do we reclaim from */ struct mem_cgroup *mem_cgroup; + /* + * Nodemask of nodes allowed by the caller. If NULL, all nodes + * are scanned. + */ + nodemask_t *nodemask; + /* Pluggable isolate pages callback */ unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst, unsigned long *scanned, int order, int mode, @@ -214,8 +220,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, do_div(delta, lru_pages + 1); shrinker->nr += delta; if (shrinker->nr < 0) { - printk(KERN_ERR "%s: nr=%ld\n", - __func__, shrinker->nr); + printk(KERN_ERR "shrink_slab: %pF negative objects to " + "delete nr=%ld\n", + shrinker->shrink, shrinker->nr); shrinker->nr = max_pass; } @@ -606,7 +613,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (unlikely(!page_evictable(page, NULL))) goto cull_mlocked; - if (!sc->may_swap && page_mapped(page)) + if (!sc->may_unmap && page_mapped(page)) goto keep_locked; /* Double the slab pressure for mapped and swapcache pages */ @@ -1298,17 +1305,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, } __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); pgdeactivate += pgmoved; - if (buffer_heads_over_limit) { - spin_unlock_irq(&zone->lru_lock); - pagevec_strip(&pvec); - spin_lock_irq(&zone->lru_lock); - } __count_zone_vm_events(PGREFILL, zone, pgscanned); __count_vm_events(PGDEACTIVATE, pgdeactivate); spin_unlock_irq(&zone->lru_lock); - if (vm_swap_full()) - pagevec_swap_free(&pvec); - + if (buffer_heads_over_limit) + pagevec_strip(&pvec); pagevec_release(&pvec); } @@ -1543,7 +1544,8 @@ static void shrink_zones(int priority, struct zonelist *zonelist, struct zone *zone; sc->all_unreclaimable = 1; - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { + for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, + sc->nodemask) { if (!populated_zone(zone)) continue; /* @@ -1688,17 +1690,18 @@ out: } unsigned long try_to_free_pages(struct zonelist *zonelist, int order, - gfp_t gfp_mask) + gfp_t gfp_mask, nodemask_t *nodemask) { struct scan_control sc = { .gfp_mask = gfp_mask, .may_writepage = !laptop_mode, .swap_cluster_max = SWAP_CLUSTER_MAX, - .may_swap = 1, + .may_unmap = 1, .swappiness = vm_swappiness, .order = order, .mem_cgroup = NULL, .isolate_pages = isolate_pages_global, + .nodemask = nodemask, }; return do_try_to_free_pages(zonelist, &sc); @@ -1713,17 +1716,18 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, { struct scan_control sc = { .may_writepage = !laptop_mode, - .may_swap = 1, + .may_unmap = 1, .swap_cluster_max = SWAP_CLUSTER_MAX, .swappiness = swappiness, .order = 0, .mem_cgroup = mem_cont, .isolate_pages = mem_cgroup_isolate_pages, + .nodemask = NULL, /* we don't care the placement */ }; struct zonelist *zonelist; if (noswap) - sc.may_swap = 0; + sc.may_unmap = 0; sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -1762,7 +1766,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) struct reclaim_state *reclaim_state = current->reclaim_state; struct scan_control sc = { .gfp_mask = GFP_KERNEL, - .may_swap = 1, + .may_unmap = 1, .swap_cluster_max = SWAP_CLUSTER_MAX, .swappiness = vm_swappiness, .order = order, @@ -2050,22 +2054,19 @@ unsigned long global_lru_pages(void) #ifdef CONFIG_PM /* * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages - * from LRU lists system-wide, for given pass and priority, and returns the - * number of reclaimed pages + * from LRU lists system-wide, for given pass and priority. * * For pass > 3 we also try to shrink the LRU lists that contain a few pages */ -static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, +static void shrink_all_zones(unsigned long nr_pages, int prio, int pass, struct scan_control *sc) { struct zone *zone; - unsigned long ret = 0; + unsigned long nr_reclaimed = 0; - for_each_zone(zone) { + for_each_populated_zone(zone) { enum lru_list l; - if (!populated_zone(zone)) - continue; if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) continue; @@ -2084,14 +2085,16 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, zone->lru[l].nr_scan = 0; nr_to_scan = min(nr_pages, lru_pages); - ret += shrink_list(l, nr_to_scan, zone, + nr_reclaimed += shrink_list(l, nr_to_scan, zone, sc, prio); - if (ret >= nr_pages) - return ret; + if (nr_reclaimed >= nr_pages) { + sc->nr_reclaimed = nr_reclaimed; + return; + } } } } - return ret; + sc->nr_reclaimed = nr_reclaimed; } /* @@ -2105,13 +2108,11 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, unsigned long shrink_all_memory(unsigned long nr_pages) { unsigned long lru_pages, nr_slab; - unsigned long ret = 0; int pass; struct reclaim_state reclaim_state; struct scan_control sc = { .gfp_mask = GFP_KERNEL, - .may_swap = 0, - .swap_cluster_max = nr_pages, + .may_unmap = 0, .may_writepage = 1, .isolate_pages = isolate_pages_global, }; @@ -2127,8 +2128,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages) if (!reclaim_state.reclaimed_slab) break; - ret += reclaim_state.reclaimed_slab; - if (ret >= nr_pages) + sc.nr_reclaimed += reclaim_state.reclaimed_slab; + if (sc.nr_reclaimed >= nr_pages) goto out; nr_slab -= reclaim_state.reclaimed_slab; @@ -2147,21 +2148,22 @@ unsigned long shrink_all_memory(unsigned long nr_pages) /* Force reclaiming mapped pages in the passes #3 and #4 */ if (pass > 2) - sc.may_swap = 1; + sc.may_unmap = 1; for (prio = DEF_PRIORITY; prio >= 0; prio--) { - unsigned long nr_to_scan = nr_pages - ret; + unsigned long nr_to_scan = nr_pages - sc.nr_reclaimed; sc.nr_scanned = 0; - ret += shrink_all_zones(nr_to_scan, prio, pass, &sc); - if (ret >= nr_pages) + sc.swap_cluster_max = nr_to_scan; + shrink_all_zones(nr_to_scan, prio, pass, &sc); + if (sc.nr_reclaimed >= nr_pages) goto out; reclaim_state.reclaimed_slab = 0; shrink_slab(sc.nr_scanned, sc.gfp_mask, global_lru_pages()); - ret += reclaim_state.reclaimed_slab; - if (ret >= nr_pages) + sc.nr_reclaimed += reclaim_state.reclaimed_slab; + if (sc.nr_reclaimed >= nr_pages) goto out; if (sc.nr_scanned && prio < DEF_PRIORITY - 2) @@ -2170,21 +2172,23 @@ unsigned long shrink_all_memory(unsigned long nr_pages) } /* - * If ret = 0, we could not shrink LRUs, but there may be something - * in slab caches + * If sc.nr_reclaimed = 0, we could not shrink LRUs, but there may be + * something in slab caches */ - if (!ret) { + if (!sc.nr_reclaimed) { do { reclaim_state.reclaimed_slab = 0; shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages()); - ret += reclaim_state.reclaimed_slab; - } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); + sc.nr_reclaimed += reclaim_state.reclaimed_slab; + } while (sc.nr_reclaimed < nr_pages && + reclaim_state.reclaimed_slab > 0); } + out: current->reclaim_state = NULL; - return ret; + return sc.nr_reclaimed; } #endif @@ -2290,11 +2294,12 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int priority; struct scan_control sc = { .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), - .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), + .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), .swap_cluster_max = max_t(unsigned long, nr_pages, SWAP_CLUSTER_MAX), .gfp_mask = gfp_mask, .swappiness = vm_swappiness, + .order = order, .isolate_pages = isolate_pages_global, }; unsigned long slab_reclaimable; diff --git a/mm/vmstat.c b/mm/vmstat.c index 8cd81ea1ddc1..9826766f1274 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -135,11 +135,7 @@ static void refresh_zone_stat_thresholds(void) int cpu; int threshold; - for_each_zone(zone) { - - if (!zone->present_pages) - continue; - + for_each_populated_zone(zone) { threshold = calculate_threshold(zone); for_each_online_cpu(cpu) @@ -301,12 +297,9 @@ void refresh_cpu_vm_stats(int cpu) int i; int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; - for_each_zone(zone) { + for_each_populated_zone(zone) { struct per_cpu_pageset *p; - if (!populated_zone(zone)) - continue; - p = zone_pcp(zone, cpu); for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index abdc703a11d2..cab71ea2796d 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -1093,11 +1093,6 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) } } -static int rfcomm_tty_read_proc(char *buf, char **start, off_t offset, int len, int *eof, void *unused) -{ - return 0; -} - static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) { struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; @@ -1156,7 +1151,6 @@ static const struct tty_operations rfcomm_ops = { .send_xchar = rfcomm_tty_send_xchar, .hangup = rfcomm_tty_hangup, .wait_until_sent = rfcomm_tty_wait_until_sent, - .read_proc = rfcomm_tty_read_proc, .tiocmget = rfcomm_tty_tiocmget, .tiocmset = rfcomm_tty_tiocmset, }; diff --git a/net/core/sock.c b/net/core/sock.c index 0620046e4eba..7dbf3ffb35cc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1677,7 +1677,7 @@ static void sock_def_error_report(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_poll(sk->sk_sleep, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); } @@ -1686,7 +1686,8 @@ static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | + POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); } @@ -1700,7 +1701,8 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + POLLWRNORM | POLLWRBAND); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 086d5ef098fd..811984d9324b 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -34,6 +34,7 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/sched.h> +#include <linux/seq_file.h> #include <linux/termios.h> #include <linux/tty.h> #include <linux/interrupt.h> @@ -72,8 +73,7 @@ static int ircomm_tty_control_indication(void *instance, void *sap, static void ircomm_tty_flow_indication(void *instance, void *sap, LOCAL_FLOW cmd); #ifdef CONFIG_PROC_FS -static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, - int *eof, void *unused); +static const struct file_operations ircomm_tty_proc_fops; #endif /* CONFIG_PROC_FS */ static struct tty_driver *driver; @@ -98,7 +98,7 @@ static const struct tty_operations ops = { .hangup = ircomm_tty_hangup, .wait_until_sent = ircomm_tty_wait_until_sent, #ifdef CONFIG_PROC_FS - .read_proc = ircomm_tty_read_proc, + .proc_fops = &ircomm_tty_proc_fops, #endif /* CONFIG_PROC_FS */ }; @@ -1245,150 +1245,170 @@ static void ircomm_tty_flow_indication(void *instance, void *sap, } #ifdef CONFIG_PROC_FS -static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf) +static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) { - int ret=0; + char sep; - ret += sprintf(buf+ret, "State: %s\n", ircomm_tty_state[self->state]); + seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]); - ret += sprintf(buf+ret, "Service type: "); + seq_puts(m, "Service type: "); if (self->service_type & IRCOMM_9_WIRE) - ret += sprintf(buf+ret, "9_WIRE"); + seq_puts(m, "9_WIRE"); else if (self->service_type & IRCOMM_3_WIRE) - ret += sprintf(buf+ret, "3_WIRE"); + seq_puts(m, "3_WIRE"); else if (self->service_type & IRCOMM_3_WIRE_RAW) - ret += sprintf(buf+ret, "3_WIRE_RAW"); + seq_puts(m, "3_WIRE_RAW"); else - ret += sprintf(buf+ret, "No common service type!\n"); - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Port name: %s\n", self->settings.port_name); - - ret += sprintf(buf+ret, "DTE status: "); - if (self->settings.dte & IRCOMM_RTS) - ret += sprintf(buf+ret, "RTS|"); - if (self->settings.dte & IRCOMM_DTR) - ret += sprintf(buf+ret, "DTR|"); - if (self->settings.dte) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "DCE status: "); - if (self->settings.dce & IRCOMM_CTS) - ret += sprintf(buf+ret, "CTS|"); - if (self->settings.dce & IRCOMM_DSR) - ret += sprintf(buf+ret, "DSR|"); - if (self->settings.dce & IRCOMM_CD) - ret += sprintf(buf+ret, "CD|"); - if (self->settings.dce & IRCOMM_RI) - ret += sprintf(buf+ret, "RI|"); - if (self->settings.dce) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Configuration: "); + seq_puts(m, "No common service type!\n"); + seq_putc(m, '\n'); + + seq_printf(m, "Port name: %s\n", self->settings.port_name); + + seq_printf(m, "DTE status:"); + sep = ' '; + if (self->settings.dte & IRCOMM_RTS) { + seq_printf(m, "%cRTS", sep); + sep = '|'; + } + if (self->settings.dte & IRCOMM_DTR) { + seq_printf(m, "%cDTR", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_puts(m, "DCE status:"); + sep = ' '; + if (self->settings.dce & IRCOMM_CTS) { + seq_printf(m, "%cCTS", sep); + sep = '|'; + } + if (self->settings.dce & IRCOMM_DSR) { + seq_printf(m, "%cDSR", sep); + sep = '|'; + } + if (self->settings.dce & IRCOMM_CD) { + seq_printf(m, "%cCD", sep); + sep = '|'; + } + if (self->settings.dce & IRCOMM_RI) { + seq_printf(m, "%cRI", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_puts(m, "Configuration: "); if (!self->settings.null_modem) - ret += sprintf(buf+ret, "DTE <-> DCE\n"); + seq_puts(m, "DTE <-> DCE\n"); else - ret += sprintf(buf+ret, - "DTE <-> DTE (null modem emulation)\n"); - - ret += sprintf(buf+ret, "Data rate: %d\n", self->settings.data_rate); - - ret += sprintf(buf+ret, "Flow control: "); - if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) - ret += sprintf(buf+ret, "XON_XOFF_IN|"); - if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) - ret += sprintf(buf+ret, "XON_XOFF_OUT|"); - if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) - ret += sprintf(buf+ret, "RTS_CTS_IN|"); - if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) - ret += sprintf(buf+ret, "RTS_CTS_OUT|"); - if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) - ret += sprintf(buf+ret, "DSR_DTR_IN|"); - if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) - ret += sprintf(buf+ret, "DSR_DTR_OUT|"); - if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) - ret += sprintf(buf+ret, "ENQ_ACK_IN|"); - if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) - ret += sprintf(buf+ret, "ENQ_ACK_OUT|"); - if (self->settings.flow_control) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Flags: "); - if (self->flags & ASYNC_CTS_FLOW) - ret += sprintf(buf+ret, "ASYNC_CTS_FLOW|"); - if (self->flags & ASYNC_CHECK_CD) - ret += sprintf(buf+ret, "ASYNC_CHECK_CD|"); - if (self->flags & ASYNC_INITIALIZED) - ret += sprintf(buf+ret, "ASYNC_INITIALIZED|"); - if (self->flags & ASYNC_LOW_LATENCY) - ret += sprintf(buf+ret, "ASYNC_LOW_LATENCY|"); - if (self->flags & ASYNC_CLOSING) - ret += sprintf(buf+ret, "ASYNC_CLOSING|"); - if (self->flags & ASYNC_NORMAL_ACTIVE) - ret += sprintf(buf+ret, "ASYNC_NORMAL_ACTIVE|"); - if (self->flags) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Role: %s\n", self->client ? - "client" : "server"); - ret += sprintf(buf+ret, "Open count: %d\n", self->open_count); - ret += sprintf(buf+ret, "Max data size: %d\n", self->max_data_size); - ret += sprintf(buf+ret, "Max header size: %d\n", self->max_header_size); + seq_puts(m, "DTE <-> DTE (null modem emulation)\n"); + + seq_printf(m, "Data rate: %d\n", self->settings.data_rate); + + seq_puts(m, "Flow control:"); + sep = ' '; + if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) { + seq_printf(m, "%cXON_XOFF_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) { + seq_printf(m, "%cXON_XOFF_OUT", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) { + seq_printf(m, "%cRTS_CTS_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) { + seq_printf(m, "%cRTS_CTS_OUT", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) { + seq_printf(m, "%cDSR_DTR_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) { + seq_printf(m, "%cDSR_DTR_OUT", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) { + seq_printf(m, "%cENQ_ACK_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) { + seq_printf(m, "%cENQ_ACK_OUT", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_puts(m, "Flags:"); + sep = ' '; + if (self->flags & ASYNC_CTS_FLOW) { + seq_printf(m, "%cASYNC_CTS_FLOW", sep); + sep = '|'; + } + if (self->flags & ASYNC_CHECK_CD) { + seq_printf(m, "%cASYNC_CHECK_CD", sep); + sep = '|'; + } + if (self->flags & ASYNC_INITIALIZED) { + seq_printf(m, "%cASYNC_INITIALIZED", sep); + sep = '|'; + } + if (self->flags & ASYNC_LOW_LATENCY) { + seq_printf(m, "%cASYNC_LOW_LATENCY", sep); + sep = '|'; + } + if (self->flags & ASYNC_CLOSING) { + seq_printf(m, "%cASYNC_CLOSING", sep); + sep = '|'; + } + if (self->flags & ASYNC_NORMAL_ACTIVE) { + seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_printf(m, "Role: %s\n", self->client ? "client" : "server"); + seq_printf(m, "Open count: %d\n", self->open_count); + seq_printf(m, "Max data size: %d\n", self->max_data_size); + seq_printf(m, "Max header size: %d\n", self->max_header_size); if (self->tty) - ret += sprintf(buf+ret, "Hardware: %s\n", + seq_printf(m, "Hardware: %s\n", self->tty->hw_stopped ? "Stopped" : "Running"); - - ret += sprintf(buf+ret, "\n"); - return ret; } - -/* - * Function ircomm_tty_read_proc (buf, start, offset, len, eof, unused) - * - * - * - */ -static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, - int *eof, void *unused) +static int ircomm_tty_proc_show(struct seq_file *m, void *v) { struct ircomm_tty_cb *self; - int count = 0, l; - off_t begin = 0; unsigned long flags; spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags); self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty); - while ((self != NULL) && (count < 4000)) { + while (self != NULL) { if (self->magic != IRCOMM_TTY_MAGIC) break; - l = ircomm_tty_line_info(self, buf + count); - count += l; - if (count+begin > offset+len) - goto done; - if (count+begin < offset) { - begin += count; - count = 0; - } - + ircomm_tty_line_info(self, m); self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty); } - *eof = 1; -done: spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags); + return 0; +} - if (offset >= count+begin) - return 0; - *start = buf + (offset-begin); - return ((len < begin+count-offset) ? len : begin+count-offset); +static int ircomm_tty_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ircomm_tty_proc_show, NULL); } + +static const struct file_operations ircomm_tty_proc_fops = { + .owner = THIS_MODULE, + .open = ircomm_tty_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_PROC_FS */ MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>"); diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 5592883e1e4a..afd91c78ce8e 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA If unsure, say N. -config SUNRPC_REGISTER_V4 - bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - default n - help - Sun added support for registering RPC services at an IPv6 - address by creating two new versions of the rpcbind protocol - (RFC 1833). - - This option enables support in the kernel RPC server for - registering kernel RPC services via version 4 of the rpcbind - protocol. If you enable this option, you must run a portmapper - daemon that supports rpcbind protocol version 4. - - Serving NFS over IPv6 from knfsd (the kernel's NFS server) - requires that you enable this option and use a portmapper that - supports rpcbind version 4. - - If unsure, say N to get traditional behavior (register kernel - RPC services using only rpcbind version 2). Distributions - using the legacy Linux portmapper daemon must say N here. - config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 836f15c0c4a3..5abab094441f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - if (status >= 0) { + if (status >= 0 || status == -EAGAIN) { clnt->cl_stats->netreconn++; task->tk_action = call_transmit; return; } - /* Something failed: remote service port may have changed */ - rpc_force_rebind(clnt); - switch (status) { - case -ENOTCONN: - case -EAGAIN: - task->tk_action = call_bind; - if (!RPC_IS_SOFT(task)) - return; /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: task->tk_action = call_timeout; - return; + break; + default: + rpc_exit(task, -EIO); } - rpc_exit(task, -EIO); } /* @@ -1105,14 +1098,26 @@ static void call_transmit_status(struct rpc_task *task) { task->tk_action = call_status; - /* - * Special case: if we've been waiting on the socket's write_space() - * callback, then don't call xprt_end_transmit(). - */ - if (task->tk_status == -EAGAIN) - return; - xprt_end_transmit(task); - rpc_task_force_reencode(task); + switch (task->tk_status) { + case -EAGAIN: + break; + default: + xprt_end_transmit(task); + /* + * Special cases: if we've been waiting on the + * socket's write_space() callback, or if the + * socket just returned a connection error, + * then hold onto the transport lock. + */ + case -ECONNREFUSED: + case -ECONNRESET: + case -ENOTCONN: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EPIPE: + rpc_task_force_reencode(task); + } } /* @@ -1152,9 +1157,12 @@ call_status(struct rpc_task *task) xprt_conditional_disconnect(task->tk_xprt, req->rq_connect_cookie); break; + case -ECONNRESET: case -ECONNREFUSED: - case -ENOTCONN: rpc_force_rebind(clnt); + rpc_delay(task, 3*HZ); + case -EPIPE: + case -ENOTCONN: task->tk_action = call_bind; break; case -EAGAIN: diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 03ae007641e4..beee6da33035 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -63,9 +63,16 @@ enum { * r_owner * * The "owner" is allowed to unset a service in the rpcbind database. - * We always use the following (arbitrary) fixed string. + * + * For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a + * UID which it maps to a local user name via a password lookup. + * In all other cases it is ignored. + * + * For SET/UNSET requests, user space provides a value, even for + * network requests, and GETADDR uses an empty string. We follow + * those precedents here. */ -#define RPCB_OWNER_STRING "rpcb" +#define RPCB_OWNER_STRING "0" #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); @@ -124,12 +131,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static const struct sockaddr_in6 rpcb_in6addr_loopback = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - .sin6_port = htons(RPCBIND_PORT), -}; - static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, size_t addrlen, u32 version) { @@ -176,9 +177,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } -static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, - u32 version, struct rpc_message *msg) +static int rpcb_register_call(const u32 version, struct rpc_message *msg) { + struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback; + size_t addrlen = sizeof(rpcb_inaddr_loopback); struct rpc_clnt *rpcb_clnt; int result, error = 0; @@ -192,7 +194,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, error = PTR_ERR(rpcb_clnt); if (error < 0) { - printk(KERN_WARNING "RPC: failed to contact local rpcbind " + dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); return error; } @@ -254,25 +256,23 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, - sizeof(rpcb_inaddr_loopback), - RPCBVERS_2, &msg); + return rpcb_register_call(RPCBVERS_2, &msg); } /* * Fill in AF_INET family-specific arguments to register */ -static int rpcb_register_netid4(struct sockaddr_in *address_to_register, - struct rpc_message *msg) +static int rpcb_register_inet4(const struct sockaddr *sap, + struct rpc_message *msg) { + const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; - unsigned short port = ntohs(address_to_register->sin_port); + unsigned short port = ntohs(sin->sin_port); char buf[32]; /* Construct AF_INET universal address */ snprintf(buf, sizeof(buf), "%pI4.%u.%u", - &address_to_register->sin_addr.s_addr, - port >> 8, port & 0xff); + &sin->sin_addr.s_addr, port >> 8, port & 0xff); map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " @@ -284,29 +284,27 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, - sizeof(rpcb_inaddr_loopback), - RPCBVERS_4, msg); + return rpcb_register_call(RPCBVERS_4, msg); } /* * Fill in AF_INET6 family-specific arguments to register */ -static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, - struct rpc_message *msg) +static int rpcb_register_inet6(const struct sockaddr *sap, + struct rpc_message *msg) { + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; - unsigned short port = ntohs(address_to_register->sin6_port); + unsigned short port = ntohs(sin6->sin6_port); char buf[64]; /* Construct AF_INET6 universal address */ - if (ipv6_addr_any(&address_to_register->sin6_addr)) + if (ipv6_addr_any(&sin6->sin6_addr)) snprintf(buf, sizeof(buf), "::.%u.%u", port >> 8, port & 0xff); else snprintf(buf, sizeof(buf), "%pI6.%u.%u", - &address_to_register->sin6_addr, - port >> 8, port & 0xff); + &sin6->sin6_addr, port >> 8, port & 0xff); map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " @@ -318,9 +316,21 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, - sizeof(rpcb_in6addr_loopback), - RPCBVERS_4, msg); + return rpcb_register_call(RPCBVERS_4, msg); +} + +static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + + dprintk("RPC: unregistering [%u, %u, '%s'] with " + "local rpcbind\n", + map->r_prog, map->r_vers, map->r_netid); + + map->r_addr = ""; + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + + return rpcb_register_call(RPCBVERS_4, msg); } /** @@ -340,10 +350,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, * invoke this function once for each [program, version, address, * netid] tuple they wish to advertise. * - * Callers may also unregister RPC services that are no longer - * available by setting the port number in the passed-in address - * to zero. Callers pass a netid of "" to unregister all - * transport netids associated with [program, version, address]. + * Callers may also unregister RPC services that are registered at a + * specific address by setting the port number in @address to zero. + * They may unregister all registered protocol families at once for + * a service by passing a NULL @address argument. If @netid is "" + * then all netids for [program, version, address] are unregistered. * * This function uses rpcbind protocol version 4 to contact the * local rpcbind daemon. The local rpcbind daemon must support @@ -378,13 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version, .rpc_argp = &map, }; + if (address == NULL) + return rpcb_unregister_all_protofamilies(&msg); + switch (address->sa_family) { case AF_INET: - return rpcb_register_netid4((struct sockaddr_in *)address, - &msg); + return rpcb_register_inet4(address, &msg); case AF_INET6: - return rpcb_register_netid6((struct sockaddr_in6 *)address, - &msg); + return rpcb_register_inet6(address, &msg); } return -EAFNOSUPPORT; @@ -579,7 +591,7 @@ void rpcb_getport_async(struct rpc_task *task) map->r_xprt = xprt_get(xprt); map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); - map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + map->r_owner = ""; map->r_status = -EIO; child = rpcb_call_async(rpcb_clnt, map, proc); @@ -703,11 +715,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, *portp = 0; addr_len = ntohl(*p++); + if (addr_len == 0) { + dprintk("RPC: rpcb_decode_getaddr: " + "service is not registered\n"); + return 0; + } + /* - * Simple sanity check. The smallest possible universal - * address is an IPv4 address string containing 11 bytes. + * Simple sanity check. */ - if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN) + if (addr_len > RPCBIND_MAXUADDRLEN) goto out_err; /* diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index bb507e2bb94d..9f2f2412a2f3 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -359,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - sa_family_t family, void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv)) { struct svc_serv *serv; unsigned int vers; @@ -368,7 +368,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) return NULL; - serv->sv_family = family; serv->sv_name = prog->pg_name; serv->sv_program = prog; serv->sv_nrthreads = 1; @@ -427,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - sa_family_t family, void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv)) { - return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); + return __svc_create(prog, bufsize, /*npools*/1, shutdown); } EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - sa_family_t family, void (*shutdown)(struct svc_serv *serv), + void (*shutdown)(struct svc_serv *serv), svc_thread_fn func, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, family, shutdown); + serv = __svc_create(prog, bufsize, npools, shutdown); if (serv != NULL) { serv->sv_function = func; @@ -719,8 +718,6 @@ svc_exit_thread(struct svc_rqst *rqstp) } EXPORT_SYMBOL_GPL(svc_exit_thread); -#ifdef CONFIG_SUNRPC_REGISTER_V4 - /* * Register an "inet" protocol family netid with the local * rpcbind daemon via an rpcbind v4 SET request. @@ -735,12 +732,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, const unsigned short protocol, const unsigned short port) { - struct sockaddr_in sin = { + const struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; - char *netid; + const char *netid; + int error; switch (protocol) { case IPPROTO_UDP: @@ -750,13 +748,23 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, netid = RPCBIND_NETID_TCP; break; default: - return -EPROTONOSUPPORT; + return -ENOPROTOOPT; } - return rpcb_v4_register(program, version, - (struct sockaddr *)&sin, netid); + error = rpcb_v4_register(program, version, + (const struct sockaddr *)&sin, netid); + + /* + * User space didn't support rpcbind v4, so retry this + * registration request with the legacy rpcbind v2 protocol. + */ + if (error == -EPROTONOSUPPORT) + error = rpcb_register(program, version, protocol, port); + + return error; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) /* * Register an "inet6" protocol family netid with the local * rpcbind daemon via an rpcbind v4 SET request. @@ -771,12 +779,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, const unsigned short protocol, const unsigned short port) { - struct sockaddr_in6 sin6 = { + const struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, .sin6_port = htons(port), }; - char *netid; + const char *netid; + int error; switch (protocol) { case IPPROTO_UDP: @@ -786,12 +795,22 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, netid = RPCBIND_NETID_TCP6; break; default: - return -EPROTONOSUPPORT; + return -ENOPROTOOPT; } - return rpcb_v4_register(program, version, - (struct sockaddr *)&sin6, netid); + error = rpcb_v4_register(program, version, + (const struct sockaddr *)&sin6, netid); + + /* + * User space didn't support rpcbind version 4, so we won't + * use a PF_INET6 listener. + */ + if (error == -EPROTONOSUPPORT) + error = -EAFNOSUPPORT; + + return error; } +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ /* * Register a kernel RPC service via rpcbind version 4. @@ -799,69 +818,43 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_register(const u32 program, const u32 version, - const sa_family_t family, +static int __svc_register(const char *progname, + const u32 program, const u32 version, + const int family, const unsigned short protocol, const unsigned short port) { - int error; + int error = -EAFNOSUPPORT; switch (family) { - case AF_INET: - return __svc_rpcb_register4(program, version, + case PF_INET: + error = __svc_rpcb_register4(program, version, protocol, port); - case AF_INET6: + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case PF_INET6: error = __svc_rpcb_register6(program, version, protocol, port); - if (error < 0) - return error; - - /* - * Work around bug in some versions of Linux rpcbind - * which don't allow registration of both inet and - * inet6 netids. - * - * Error return ignored for now. - */ - __svc_rpcb_register4(program, version, - protocol, port); - return 0; +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ } - return -EAFNOSUPPORT; -} - -#else /* CONFIG_SUNRPC_REGISTER_V4 */ - -/* - * Register a kernel RPC service via rpcbind version 2. - * - * Returns zero on success; a negative errno value is returned - * if any error occurs. - */ -static int __svc_register(const u32 program, const u32 version, - sa_family_t family, - const unsigned short protocol, - const unsigned short port) -{ - if (family != AF_INET) - return -EAFNOSUPPORT; - - return rpcb_register(program, version, protocol, port); + if (error < 0) + printk(KERN_WARNING "svc: failed to register %sv%u RPC " + "service (errno %d).\n", progname, version, -error); + return error; } -#endif /* CONFIG_SUNRPC_REGISTER_V4 */ - /** * svc_register - register an RPC service with the local portmapper * @serv: svc_serv struct for the service to register + * @family: protocol family of service's listener socket * @proto: transport protocol number to advertise * @port: port to advertise * - * Service is registered for any address in serv's address family + * Service is registered for any address in the passed-in protocol family */ -int svc_register(const struct svc_serv *serv, const unsigned short proto, - const unsigned short port) +int svc_register(const struct svc_serv *serv, const int family, + const unsigned short proto, const unsigned short port) { struct svc_program *progp; unsigned int i; @@ -879,15 +872,15 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto, i, proto == IPPROTO_UDP? "udp" : "tcp", port, - serv->sv_family, + family, progp->pg_vers[i]->vs_hidden? " (but not telling portmap)" : ""); if (progp->pg_vers[i]->vs_hidden) continue; - error = __svc_register(progp->pg_prog, i, - serv->sv_family, proto, port); + error = __svc_register(progp->pg_name, progp->pg_prog, + i, family, proto, port); if (error < 0) break; } @@ -896,38 +889,31 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto, return error; } -#ifdef CONFIG_SUNRPC_REGISTER_V4 - +/* + * If user space is running rpcbind, it should take the v4 UNSET + * and clear everything for this [program, version]. If user space + * is running portmap, it will reject the v4 UNSET, but won't have + * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient + * in this case to clear all existing entries for [program, version]. + */ static void __svc_unregister(const u32 program, const u32 version, const char *progname) { - struct sockaddr_in6 sin6 = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_ANY_INIT, - .sin6_port = 0, - }; int error; - error = rpcb_v4_register(program, version, - (struct sockaddr *)&sin6, ""); - dprintk("svc: %s(%sv%u), error %d\n", - __func__, progname, version, error); -} - -#else /* CONFIG_SUNRPC_REGISTER_V4 */ + error = rpcb_v4_register(program, version, NULL, ""); -static void __svc_unregister(const u32 program, const u32 version, - const char *progname) -{ - int error; + /* + * User space didn't support rpcbind v4, so retry this + * request with the legacy rpcbind v2 protocol. + */ + if (error == -EPROTONOSUPPORT) + error = rpcb_register(program, version, 0, 0); - error = rpcb_register(program, version, 0, 0); dprintk("svc: %s(%sv%u), error %d\n", __func__, progname, version, error); } -#endif /* CONFIG_SUNRPC_REGISTER_V4 */ - /* * All netids, bind addresses and ports registered for [program, version] * are removed from the local rpcbind database (if the service is not diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e588df5d6b34..2819ee093f36 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -161,7 +161,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init); static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct svc_serv *serv, - unsigned short port, int flags) + const int family, + const unsigned short port, + int flags) { struct sockaddr_in sin = { .sin_family = AF_INET, @@ -176,12 +178,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct sockaddr *sap; size_t len; - switch (serv->sv_family) { - case AF_INET: + switch (family) { + case PF_INET: sap = (struct sockaddr *)&sin; len = sizeof(sin); break; - case AF_INET6: + case PF_INET6: sap = (struct sockaddr *)&sin6; len = sizeof(sin6); break; @@ -192,7 +194,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return xcl->xcl_ops->xpo_create(serv, sap, len, flags); } -int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, +int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, + const int family, const unsigned short port, int flags) { struct svc_xprt_class *xcl; @@ -209,7 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = __svc_xpo_create(xcl, serv, port, flags); + newxprt = __svc_xpo_create(xcl, serv, family, port, flags); if (IS_ERR(newxprt)) { module_put(xcl->xcl_owner); return PTR_ERR(newxprt); @@ -1033,7 +1036,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) return dr; } -/* +/** + * svc_find_xprt - find an RPC transport instance + * @serv: pointer to svc_serv to search + * @xcl_name: C string containing transport's class name + * @af: Address family of transport's local address + * @port: transport's IP port number + * * Return the transport instance pointer for the endpoint accepting * connections/peer traffic from the specified transport class, * address family and port. @@ -1042,14 +1051,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * wild-card, and will result in matching the first transport in the * service's list that has a matching class name. */ -struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, - int af, int port) +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, + const sa_family_t af, const unsigned short port) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; /* Sanity check the args */ - if (!serv || !xcl_name) + if (serv == NULL || xcl_name == NULL) return found; spin_lock_bh(&serv->sv_lock); @@ -1058,7 +1067,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, continue; if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) continue; - if (port && port != svc_xprt_local_port(xprt)) + if (port != 0 && port != svc_xprt_local_port(xprt)) continue; found = xprt; svc_xprt_get(xprt); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5763e6460fea..9d504234af4a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, struct svc_sock *svsk; struct sock *inet; int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); - int val; dprintk("svc: svc_setup_socket %p\n", sock); if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { @@ -1122,7 +1121,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, inet->sk_protocol, + *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, ntohs(inet_sk(inet)->sport)); if (*errp < 0) { @@ -1143,18 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); - /* - * We start one listener per sv_serv. We want AF_INET - * requests to be automatically shunted to our AF_INET6 - * listener using a mapped IPv4 address. Make sure - * no-one starts an equivalent IPv4 listener, which - * would steal our incoming connections. - */ - val = 0; - if (serv->sv_family == AF_INET6) - kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, - (char *)&val, sizeof(val)); - dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -1222,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, struct sockaddr_storage addr; struct sockaddr *newsin = (struct sockaddr *)&addr; int newlen; + int family; + int val; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); dprintk("svc: svc_create_socket(%s, %d, %s)\n", @@ -1233,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, "sockets supported\n"); return ERR_PTR(-EINVAL); } + type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; + switch (sin->sa_family) { + case AF_INET6: + family = PF_INET6; + break; + case AF_INET: + family = PF_INET; + break; + default: + return ERR_PTR(-EINVAL); + } - error = sock_create_kern(sin->sa_family, type, protocol, &sock); + error = sock_create_kern(family, type, protocol, &sock); if (error < 0) return ERR_PTR(error); svc_reclassify_socket(sock); + /* + * If this is an PF_INET6 listener, we want to avoid + * getting requests from IPv4 remotes. Those should + * be shunted to a PF_INET listener via rpcbind. + */ + val = 1; + if (family == PF_INET6) + kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, + (char *)&val, sizeof(val)); + if (type == SOCK_STREAM) sock->sk->sk_reuse = 1; /* allow address reuse */ error = kernel_bind(sock, sin, len); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 62098d101a1f..a0bfe53f1621 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -152,6 +152,37 @@ out: EXPORT_SYMBOL_GPL(xprt_unregister_transport); /** + * xprt_load_transport - load a transport implementation + * @transport_name: transport to load + * + * Returns: + * 0: transport successfully loaded + * -ENOENT: transport module not available + */ +int xprt_load_transport(const char *transport_name) +{ + struct xprt_class *t; + char module_name[sizeof t->name + 5]; + int result; + + result = 0; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (strcmp(t->name, transport_name) == 0) { + spin_unlock(&xprt_list_lock); + goto out; + } + } + spin_unlock(&xprt_list_lock); + strcpy(module_name, "xprt"); + strncat(module_name, transport_name, sizeof t->name); + result = request_module(module_name); +out: + return result; +} +EXPORT_SYMBOL_GPL(xprt_load_transport); + +/** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport * @@ -580,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt) dprintk("RPC: disconnected transport %p\n", xprt); spin_lock_bh(&xprt->transport_lock); xprt_clear_connected(xprt); - xprt_wake_pending_tasks(xprt, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -EAGAIN); spin_unlock_bh(&xprt->transport_lock); } EXPORT_SYMBOL_GPL(xprt_disconnect_done); @@ -598,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - xprt_wake_pending_tasks(xprt, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -EAGAIN); spin_unlock_bh(&xprt->transport_lock); } @@ -625,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - xprt_wake_pending_tasks(xprt, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -EAGAIN); out: spin_unlock_bh(&xprt->transport_lock); } @@ -695,9 +726,8 @@ static void xprt_connect_status(struct rpc_task *task) } switch (task->tk_status) { - case -ENOTCONN: - dprintk("RPC: %5u xprt_connect_status: connection broken\n", - task->tk_pid); + case -EAGAIN: + dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid); break; case -ETIMEDOUT: dprintk("RPC: %5u xprt_connect_status: connect attempt timed " @@ -818,15 +848,8 @@ int xprt_prepare_transmit(struct rpc_task *task) err = req->rq_received; goto out_unlock; } - if (!xprt->ops->reserve_xprt(task)) { + if (!xprt->ops->reserve_xprt(task)) err = -EAGAIN; - goto out_unlock; - } - - if (!xprt_connected(xprt)) { - err = -ENOTCONN; - goto out_unlock; - } out_unlock: spin_unlock_bh(&xprt->transport_lock); return err; @@ -870,32 +893,26 @@ void xprt_transmit(struct rpc_task *task) req->rq_connect_cookie = xprt->connect_cookie; req->rq_xtime = jiffies; status = xprt->ops->send_request(task); - if (status == 0) { - dprintk("RPC: %5u xmit complete\n", task->tk_pid); - spin_lock_bh(&xprt->transport_lock); + if (status != 0) { + task->tk_status = status; + return; + } - xprt->ops->set_retrans_timeout(task); + dprintk("RPC: %5u xmit complete\n", task->tk_pid); + spin_lock_bh(&xprt->transport_lock); - xprt->stat.sends++; - xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; - xprt->stat.bklog_u += xprt->backlog.qlen; + xprt->ops->set_retrans_timeout(task); - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (!req->rq_received) - rpc_sleep_on(&xprt->pending, task, xprt_timer); - spin_unlock_bh(&xprt->transport_lock); - return; - } + xprt->stat.sends++; + xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; + xprt->stat.bklog_u += xprt->backlog.qlen; - /* Note: at this point, task->tk_sleeping has not yet been set, - * hence there is no danger of the waking up task being put on - * schedq, and being picked up by a parallel run of rpciod(). - */ - task->tk_status = status; - if (status == -ECONNREFUSED) - rpc_sleep_on(&xprt->sending, task, NULL); + /* Don't race with disconnect */ + if (!xprt_connected(xprt)) + task->tk_status = -ENOTCONN; + else if (!req->rq_received) + rpc_sleep_on(&xprt->pending, task, xprt_timer); + spin_unlock_bh(&xprt->transport_lock); } static inline void do_xprt_reserve(struct rpc_task *task) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 14106d26bb95..e5e28d1946a4 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) __func__, pad, destp, rqst->rq_slen, curlen); copy_len = rqst->rq_snd_buf.page_len; + + if (rqst->rq_snd_buf.tail[0].iov_len) { + curlen = rqst->rq_snd_buf.tail[0].iov_len; + if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) { + memmove(destp + copy_len, + rqst->rq_snd_buf.tail[0].iov_base, curlen); + r_xprt->rx_stats.pullup_copy_count += curlen; + } + dprintk("RPC: %s: tail destp 0x%p len %d\n", + __func__, destp + copy_len, curlen); + rqst->rq_svec[0].iov_len += curlen; + } + r_xprt->rx_stats.pullup_copy_count += copy_len; npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; for (i = 0; copy_len && i < npages; i++) { @@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) destp += curlen; copy_len -= curlen; } - if (rqst->rq_snd_buf.tail[0].iov_len) { - curlen = rqst->rq_snd_buf.tail[0].iov_len; - if (destp != rqst->rq_snd_buf.tail[0].iov_base) { - memcpy(destp, - rqst->rq_snd_buf.tail[0].iov_base, curlen); - r_xprt->rx_stats.pullup_copy_count += curlen; - } - dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n", - __func__, destp, copy_len, curlen); - rqst->rq_svec[0].iov_len += curlen; - } /* header now contains entire send message */ return pad; } @@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) curlen = rqst->rq_rcv_buf.tail[0].iov_len; if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) - memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); + memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", __func__, srcp, copy_len, curlen); rqst->rq_rcv_buf.tail[0].iov_len = curlen; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index a3334e3b73cc..6c26a675435a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -191,7 +191,6 @@ static int map_xdr(struct svcxprt_rdma *xprt, struct xdr_buf *xdr, struct svc_rdma_req_map *vec) { - int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; int sge_no; u32 sge_bytes; u32 page_bytes; @@ -235,7 +234,11 @@ static int map_xdr(struct svcxprt_rdma *xprt, sge_no++; } - BUG_ON(sge_no > sge_max); + dprintk("svcrdma: map_xdr: sge_no %d page_no %d " + "page_base %u page_len %u head_len %zu tail_len %zu\n", + sge_no, page_no, xdr->page_base, xdr->page_len, + xdr->head[0].iov_len, xdr->tail[0].iov_len); + vec->count = sge_no; return 0; } @@ -579,7 +582,6 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[page_no+1].length = 0; } BUG_ON(sge_no > rdma->sc_max_sge); - BUG_ON(sge_no > ctxt->count); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 568330eebbfe..d40ff50887aa 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -49,6 +49,9 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +#define XS_TCP_LINGER_TO (15U * HZ) +static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; + /* * We can register our own files under /proc/sys/sunrpc by * calling register_sysctl_table() again. The files in that @@ -117,6 +120,14 @@ static ctl_table xs_tunables_table[] = { .extra2 = &xprt_max_resvport_limit }, { + .procname = "tcp_fin_timeout", + .data = &xs_tcp_fin_timeout, + .maxlen = sizeof(xs_tcp_fin_timeout), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = sysctl_jiffies + }, + { .ctl_name = 0, }, }; @@ -521,11 +532,12 @@ static void xs_nospace_callback(struct rpc_task *task) * @task: task to put to sleep * */ -static void xs_nospace(struct rpc_task *task) +static int xs_nospace(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = 0; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", task->tk_pid, req->rq_slen - req->rq_bytes_sent, @@ -537,6 +549,7 @@ static void xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + ret = -EAGAIN; /* * Notify TCP that we're limited by the application * window size @@ -548,10 +561,11 @@ static void xs_nospace(struct rpc_task *task) } } else { clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - task->tk_status = -ENOTCONN; + ret = -ENOTCONN; } spin_unlock_bh(&xprt->transport_lock); + return ret; } /** @@ -594,6 +608,8 @@ static int xs_udp_send_request(struct rpc_task *task) /* Still some bytes left; set up for a retry later. */ status = -EAGAIN; } + if (!transport->sock) + goto out; switch (status) { case -ENOTSOCK: @@ -601,21 +617,19 @@ static int xs_udp_send_request(struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - xs_nospace(task); + status = xs_nospace(task); break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); case -ENETUNREACH: case -EPIPE: case -ECONNREFUSED: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - break; - default: - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); } - +out: return status; } @@ -697,6 +711,8 @@ static int xs_tcp_send_request(struct rpc_task *task) status = -EAGAIN; break; } + if (!transport->sock) + goto out; switch (status) { case -ENOTSOCK: @@ -704,23 +720,19 @@ static int xs_tcp_send_request(struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - xs_nospace(task); + status = xs_nospace(task); break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); case -ECONNRESET: + case -EPIPE: xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: - case -EPIPE: - status = -ENOTCONN; - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - break; - default: - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - xs_tcp_shutdown(xprt); } - +out: return status; } @@ -767,23 +779,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s sk->sk_error_report = transport->old_error_report; } -/** - * xs_close - close a socket - * @xprt: transport - * - * This is used when all requests are complete; ie, no DRC state remains - * on the server we want to save. - */ -static void xs_close(struct rpc_xprt *xprt) +static void xs_reset_transport(struct sock_xprt *transport) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct socket *sock = transport->sock; struct sock *sk = transport->inet; - if (!sk) - goto clear_close_wait; - - dprintk("RPC: xs_close xprt %p\n", xprt); + if (sk == NULL) + return; write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; @@ -797,8 +799,25 @@ static void xs_close(struct rpc_xprt *xprt) sk->sk_no_check = 0; sock_release(sock); -clear_close_wait: +} + +/** + * xs_close - close a socket + * @xprt: transport + * + * This is used when all requests are complete; ie, no DRC state remains + * on the server we want to save. + */ +static void xs_close(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + dprintk("RPC: xs_close xprt %p\n", xprt); + + xs_reset_transport(transport); + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); @@ -1126,6 +1145,47 @@ out: read_unlock(&sk->sk_callback_lock); } +/* + * Do the equivalent of linger/linger2 handling for dealing with + * broken servers that don't close the socket in a timely + * fashion + */ +static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, + unsigned long timeout) +{ + struct sock_xprt *transport; + + if (xprt_test_and_set_connecting(xprt)) + return; + set_bit(XPRT_CONNECTION_ABORT, &xprt->state); + transport = container_of(xprt, struct sock_xprt, xprt); + queue_delayed_work(rpciod_workqueue, &transport->connect_worker, + timeout); +} + +static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport; + + transport = container_of(xprt, struct sock_xprt, xprt); + + if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || + !cancel_delayed_work(&transport->connect_worker)) + return; + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + xprt_clear_connecting(xprt); +} + +static void xs_sock_mark_closed(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_clear_bit(); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); +} + /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed @@ -1158,7 +1218,7 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; - xprt_wake_pending_tasks(xprt, 0); + xprt_wake_pending_tasks(xprt, -EAGAIN); } spin_unlock_bh(&xprt->transport_lock); break; @@ -1171,10 +1231,10 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); smp_mb__after_clear_bit(); + xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ - set_bit(XPRT_CLOSING, &xprt->state); xprt_force_disconnect(xprt); case TCP_SYN_SENT: xprt->connect_cookie++; @@ -1187,40 +1247,35 @@ static void xs_tcp_state_change(struct sock *sk) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; break; case TCP_LAST_ACK: + set_bit(XPRT_CLOSING, &xprt->state); + xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_clear_bit(); break; case TCP_CLOSE: - smp_mb__before_clear_bit(); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_clear_bit(); - /* Mark transport as closed and wake up all pending tasks */ - xprt_disconnect_done(xprt); + xs_tcp_cancel_linger_timeout(xprt); + xs_sock_mark_closed(xprt); } out: read_unlock(&sk->sk_callback_lock); } /** - * xs_tcp_error_report - callback mainly for catching RST events + * xs_error_report - callback mainly for catching socket errors * @sk: socket */ -static void xs_tcp_error_report(struct sock *sk) +static void xs_error_report(struct sock *sk) { struct rpc_xprt *xprt; read_lock(&sk->sk_callback_lock); - if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED) - goto out; if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: %s client %p...\n" "RPC: error %d\n", __func__, xprt, sk->sk_err); - - xprt_force_disconnect(xprt); + xprt_wake_pending_tasks(xprt, -EAGAIN); out: read_unlock(&sk->sk_callback_lock); } @@ -1494,6 +1549,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; + sk->sk_error_report = xs_error_report; sk->sk_no_check = UDP_CSUM_NORCV; sk->sk_allocation = GFP_ATOMIC; @@ -1526,9 +1582,10 @@ static void xs_udp_connect_worker4(struct work_struct *work) goto out; /* Start by resetting any existing state */ - xs_close(xprt); + xs_reset_transport(transport); - if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } @@ -1545,8 +1602,8 @@ static void xs_udp_connect_worker4(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** @@ -1567,9 +1624,10 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; /* Start by resetting any existing state */ - xs_close(xprt); + xs_reset_transport(transport); - if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } @@ -1586,18 +1644,17 @@ static void xs_udp_connect_worker6(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /* * We need to preserve the port number so the reply cache on the server can * find our cached RPC replies when we get around to reconnecting. */ -static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) +static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) { int result; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct sockaddr any; dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); @@ -1609,11 +1666,24 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; result = kernel_connect(transport->sock, &any, sizeof(any), 0); - if (result) + if (!result) + xs_sock_mark_closed(xprt); + else dprintk("RPC: AF_UNSPEC connect return code %d\n", result); } +static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) +{ + unsigned int state = transport->inet->sk_state; + + if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) + return; + if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) + return; + xs_abort_connection(xprt, transport); +} + static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1629,7 +1699,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; - sk->sk_error_report = xs_tcp_error_report; + sk->sk_error_report = xs_error_report; sk->sk_allocation = GFP_ATOMIC; /* socket options */ @@ -1657,37 +1727,42 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } /** - * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint - * @work: RPC transport to connect + * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint + * @xprt: RPC transport to connect + * @transport: socket transport to connect + * @create_sock: function to create a socket of the correct type * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker4(struct work_struct *work) +static void xs_tcp_setup_socket(struct rpc_xprt *xprt, + struct sock_xprt *transport, + struct socket *(*create_sock)(struct rpc_xprt *, + struct sock_xprt *)) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; struct socket *sock = transport->sock; - int err, status = -EIO; + int status = -EIO; if (xprt->shutdown) goto out; if (!sock) { - /* start from scratch */ - if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + sock = create_sock(xprt, transport); + if (IS_ERR(sock)) { + status = PTR_ERR(sock); goto out; } - xs_reclassify_socket4(sock); + } else { + int abort_and_exit; - if (xs_bind4(transport, sock) < 0) { - sock_release(sock); - goto out; - } - } else + abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, + &xprt->state); /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt); + xs_tcp_reuse_connection(xprt, transport); + + if (abort_and_exit) + goto out_eagain; + } dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1696,83 +1771,104 @@ static void xs_tcp_connect_worker4(struct work_struct *work) dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - } + switch (status) { + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + /* retry with existing socket, after a delay */ + case 0: + case -EINPROGRESS: + case -EALREADY: + xprt_clear_connecting(xprt); + return; } + /* get rid of existing socket, and retry */ + xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); +out_eagain: + status = -EAGAIN; out: - xprt_wake_pending_tasks(xprt, status); -out_clear: xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); +} + +static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt, + struct sock_xprt *transport) +{ + struct socket *sock; + int err; + + /* start from scratch */ + err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", + -err); + goto out_err; + } + xs_reclassify_socket4(sock); + + if (xs_bind4(transport, sock) < 0) { + sock_release(sock); + goto out_err; + } + return sock; +out_err: + return ERR_PTR(-EIO); } /** - * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker6(struct work_struct *work) +static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; - int err, status = -EIO; - if (xprt->shutdown) - goto out; + xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4); +} - if (!sock) { - /* start from scratch */ - if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket6(sock); +static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, + struct sock_xprt *transport) +{ + struct socket *sock; + int err; + + /* start from scratch */ + err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", + -err); + goto out_err; + } + xs_reclassify_socket6(sock); - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out; - } - } else - /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt); + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out_err; + } + return sock; +out_err: + return ERR_PTR(-EIO); +} - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); +/** + * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; - status = xs_tcp_finish_connecting(xprt, sock); - dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - } - } -out: - xprt_wake_pending_tasks(xprt, status); -out_clear: - xprt_clear_connecting(xprt); + xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6); } /** @@ -1817,9 +1913,6 @@ static void xs_tcp_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - /* Initiate graceful shutdown of the socket if not already done */ - if (test_bit(XPRT_CONNECTED, &xprt->state)) - xs_tcp_shutdown(xprt); /* Exit if we need to wait for socket shutdown to complete */ if (test_bit(XPRT_CLOSING, &xprt->state)) return; |