diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-25 09:19:23 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-25 09:19:23 -0800 |
commit | 7c3dc440b1f5c75f45e24430f913e561dc82a419 (patch) | |
tree | 7c3e037691d712a20f7e95279cc1e090328d5d44 /include | |
parent | d8e473182ab9e85708067be81d20424045d939fa (diff) | |
parent | e686c32590f40bffc45f105c04c836ffad3e531a (diff) |
Merge tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull Compute Express Link (CXL) updates from Dan Williams:
"To date Linux has been dependent on platform-firmware to map CXL RAM
regions and handle events / errors from devices. With this update we
can now parse / update the CXL memory layout, and report events /
errors from devices. This is a precursor for the CXL subsystem to
handle the end-to-end "RAS" flow for CXL memory. i.e. the flow that
for DDR-attached-DRAM is handled by the EDAC driver where it maps
system physical address events to a field-replaceable-unit (FRU /
endpoint device). In general, CXL has the potential to standardize
what has historically been a pile of memory-controller-specific error
handling logic.
Another change of note is the default policy for handling RAM-backed
device-dax instances. Previously the default access mode was "device",
mmap(2) a device special file to access memory. The new default is
"kmem" where the address range is assigned to the core-mm via
add_memory_driver_managed(). This saves typical users from wondering
why their platform memory is not visible via free(1) and stuck behind
a device-file. At the same time it allows expert users to deploy
policy to, for example, get dedicated access to high performance
memory, or hide low performance memory from general purpose kernel
allocations. This affects not only CXL, but also systems with
high-bandwidth-memory that platform-firmware tags with the
EFI_MEMORY_SP (special purpose) designation.
Summary:
- CXL RAM region enumeration: instantiate 'struct cxl_region' objects
for platform firmware created memory regions
- CXL RAM region provisioning: complement the existing PMEM region
creation support with RAM region support
- "Soft Reservation" policy change: Online (memory hot-add)
soft-reserved memory (EFI_MEMORY_SP) by default, but still allow
for setting aside such memory for dedicated access via device-dax.
- CXL Events and Interrupts: Takeover CXL event handling from
platform-firmware (ACPI calls this CXL Memory Error Reporting) and
export CXL Events via Linux Trace Events.
- Convey CXL _OSC results to drivers: Similar to PCI, let the CXL
subsystem interrogate the result of CXL _OSC negotiation.
- Emulate CXL DVSEC Range Registers as "decoders": Allow for
first-generation devices that pre-date the definition of the CXL
HDM Decoder Capability to translate the CXL DVSEC Range Registers
into 'struct cxl_decoder' objects.
- Set timestamp: Per spec, set the device timestamp in case of
hotplug, or if platform-firwmare failed to set it.
- General fixups: linux-next build issues, non-urgent fixes for
pre-production hardware, unit test fixes, spelling and debug
message improvements"
* tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (66 commits)
dax/kmem: Fix leak of memory-hotplug resources
cxl/mem: Add kdoc param for event log driver state
cxl/trace: Add serial number to trace points
cxl/trace: Add host output to trace points
cxl/trace: Standardize device information output
cxl/pci: Remove locked check for dvsec_range_allowed()
cxl/hdm: Add emulation when HDM decoders are not committed
cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders
cxl/hdm: Emulate HDM decoder from DVSEC range registers
cxl/pci: Refactor cxl_hdm_decode_init()
cxl/port: Export cxl_dvsec_rr_decode() to cxl_port
cxl/pci: Break out range register decoding from cxl_hdm_decode_init()
cxl: add RAS status unmasking for CXL
cxl: remove unnecessary calling of pci_enable_pcie_error_reporting()
dax/hmem: build hmem device support as module if possible
dax: cxl: add CXL_REGION dependency
cxl: avoid returning uninitialized error code
cxl/pmem: Fix nvdimm registration races
cxl/mem: Fix UAPI command comment
cxl/uapi: Tag commands from cxl_query_cmd()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/dax.h | 7 | ||||
-rw-r--r-- | include/linux/libnvdimm.h | 3 | ||||
-rw-r--r-- | include/linux/memregion.h | 2 | ||||
-rw-r--r-- | include/linux/pci.h | 1 | ||||
-rw-r--r-- | include/linux/range.h | 5 | ||||
-rw-r--r-- | include/trace/events/cxl.h | 112 | ||||
-rw-r--r-- | include/uapi/linux/cxl_mem.h | 30 | ||||
-rw-r--r-- | include/uapi/linux/pci_regs.h | 1 |
8 files changed, 42 insertions, 119 deletions
diff --git a/include/linux/dax.h b/include/linux/dax.h index 2b5ecb591059..bf6258472e49 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -262,11 +262,14 @@ static inline bool dax_mapping(struct address_space *mapping) } #ifdef CONFIG_DEV_DAX_HMEM_DEVICES -void hmem_register_device(int target_nid, struct resource *r); +void hmem_register_resource(int target_nid, struct resource *r); #else -static inline void hmem_register_device(int target_nid, struct resource *r) +static inline void hmem_register_resource(int target_nid, struct resource *r) { } #endif +typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, + const struct resource *res); +int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); #endif diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index af38252ad704..e772aae71843 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -41,6 +41,9 @@ enum { */ NDD_INCOHERENT = 7, + /* dimm provider wants synchronous registration by __nvdimm_create() */ + NDD_REGISTER_SYNC = 8, + /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, ND_CMD_MAX_ELEM = 5, diff --git a/include/linux/memregion.h b/include/linux/memregion.h index bf83363807ac..c01321467789 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -3,10 +3,12 @@ #define _MEMREGION_H_ #include <linux/types.h> #include <linux/errno.h> +#include <linux/range.h> #include <linux/bug.h> struct memregion_info { int target_node; + struct range range; }; #ifdef CONFIG_MEMREGION diff --git a/include/linux/pci.h b/include/linux/pci.h index 5f6fc5358b3c..fafd8020c6d7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -579,6 +579,7 @@ struct pci_host_bridge { unsigned int native_pme:1; /* OS may use PCIe PME */ unsigned int native_ltr:1; /* OS may use PCIe LTR */ unsigned int native_dpc:1; /* OS may use PCIe DPC */ + unsigned int native_cxl_error:1; /* OS may use CXL RAS/Events */ unsigned int preserve_config:1; /* Preserve FW resource setup */ unsigned int size_windows:1; /* Enable root bus sizing */ unsigned int msi_domain:1; /* Bridge wants MSI domain */ diff --git a/include/linux/range.h b/include/linux/range.h index 274681cc3154..7efb6a9b069b 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -13,6 +13,11 @@ static inline u64 range_len(const struct range *range) return range->end - range->start + 1; } +static inline bool range_contains(struct range *r1, struct range *r2) +{ + return r1->start <= r2->start && r1->end >= r2->end; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h deleted file mode 100644 index ad085a2534ef..000000000000 --- a/include/trace/events/cxl.h +++ /dev/null @@ -1,112 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM cxl - -#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _CXL_EVENTS_H - -#include <linux/tracepoint.h> - -#define CXL_HEADERLOG_SIZE SZ_512 -#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32) - -#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0) -#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1) -#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2) -#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3) -#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4) -#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5) -#define CXL_RAS_UC_MEM_BE_PARITY BIT(6) -#define CXL_RAS_UC_MEM_DATA_ECC BIT(7) -#define CXL_RAS_UC_REINIT_THRESH BIT(8) -#define CXL_RAS_UC_RSVD_ENCODE BIT(9) -#define CXL_RAS_UC_POISON BIT(10) -#define CXL_RAS_UC_RECV_OVERFLOW BIT(11) -#define CXL_RAS_UC_INTERNAL_ERR BIT(14) -#define CXL_RAS_UC_IDE_TX_ERR BIT(15) -#define CXL_RAS_UC_IDE_RX_ERR BIT(16) - -#define show_uc_errs(status) __print_flags(status, " | ", \ - { CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, \ - { CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, \ - { CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, \ - { CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ - { CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, \ - { CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" }, \ - { CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" }, \ - { CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, \ - { CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, \ - { CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, \ - { CXL_RAS_UC_POISON, "Received Poison From Peer" }, \ - { CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, \ - { CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \ - { CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \ - { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ -) - -TRACE_EVENT(cxl_aer_uncorrectable_error, - TP_PROTO(const struct device *dev, u32 status, u32 fe, u32 *hl), - TP_ARGS(dev, status, fe, hl), - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(u32, status) - __field(u32, first_error) - __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) - ), - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->status = status; - __entry->first_error = fe; - /* - * Embed the 512B headerlog data for user app retrieval and - * parsing, but no need to print this in the trace buffer. - */ - memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); - ), - TP_printk("%s: status: '%s' first_error: '%s'", - __get_str(dev_name), - show_uc_errs(__entry->status), - show_uc_errs(__entry->first_error) - ) -); - -#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0) -#define CXL_RAS_CE_MEM_DATA_ECC BIT(1) -#define CXL_RAS_CE_CRC_THRESH BIT(2) -#define CLX_RAS_CE_RETRY_THRESH BIT(3) -#define CXL_RAS_CE_CACHE_POISON BIT(4) -#define CXL_RAS_CE_MEM_POISON BIT(5) -#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(6) - -#define show_ce_errs(status) __print_flags(status, " | ", \ - { CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ - { CXL_RAS_CE_MEM_DATA_ECC, "Memory Data ECC Error" }, \ - { CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, \ - { CLX_RAS_CE_RETRY_THRESH, "Retry Threshold" }, \ - { CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" }, \ - { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \ - { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ -) - -TRACE_EVENT(cxl_aer_correctable_error, - TP_PROTO(const struct device *dev, u32 status), - TP_ARGS(dev, status), - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(u32, status) - ), - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->status = status; - ), - TP_printk("%s: status: '%s'", - __get_str(dev_name), show_ce_errs(__entry->status) - ) -); - -#endif /* _CXL_EVENTS_H */ - -/* This part must be outside protection */ -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE cxl -#include <trace/define_trace.h> diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index c71021a2a9ed..86bbacf2a315 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -11,14 +11,19 @@ /** * DOC: UAPI * - * Not all of all commands that the driver supports are always available for use - * by userspace. Userspace must check the results from the QUERY command in - * order to determine the live set of commands. + * Not all of the commands that the driver supports are available for use by + * userspace at all times. Userspace can check the result of the QUERY command + * to determine the live set of commands. Alternatively, it can issue the + * command and check for failure. */ #define CXL_MEM_QUERY_COMMANDS _IOR(0xCE, 1, struct cxl_mem_query_commands) #define CXL_MEM_SEND_COMMAND _IOWR(0xCE, 2, struct cxl_send_command) +/* + * NOTE: New defines must be added to the end of the list to preserve + * compatibility because this enum is exported to user space. + */ #define CXL_CMDS \ ___C(INVALID, "Invalid Command"), \ ___C(IDENTIFY, "Identify Command"), \ @@ -68,6 +73,19 @@ static const struct { * struct cxl_command_info - Command information returned from a query. * @id: ID number for the command. * @flags: Flags that specify command behavior. + * + * CXL_MEM_COMMAND_FLAG_USER_ENABLED + * + * The given command id is supported by the driver and is supported by + * a related opcode on the device. + * + * CXL_MEM_COMMAND_FLAG_EXCLUSIVE + * + * Requests with the given command id will terminate with EBUSY as the + * kernel actively owns management of the given resource. For example, + * the label-storage-area can not be written while the kernel is + * actively managing that space. + * * @size_in: Expected input size, or ~0 if variable length. * @size_out: Expected output size, or ~0 if variable length. * @@ -77,7 +95,7 @@ static const struct { * bytes of output. * * - @id = 10 - * - @flags = 0 + * - @flags = CXL_MEM_COMMAND_FLAG_ENABLED * - @size_in = ~0 * - @size_out = 0 * @@ -87,7 +105,9 @@ struct cxl_command_info { __u32 id; __u32 flags; -#define CXL_MEM_COMMAND_FLAG_MASK GENMASK(0, 0) +#define CXL_MEM_COMMAND_FLAG_MASK GENMASK(1, 0) +#define CXL_MEM_COMMAND_FLAG_ENABLED BIT(0) +#define CXL_MEM_COMMAND_FLAG_EXCLUSIVE BIT(1) __u32 size_in; __u32 size_out; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 85ab1278811e..dc2000e0fe3a 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -693,6 +693,7 @@ #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ +#define PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ |