diff options
Diffstat (limited to 'drivers/platform/x86/intel/ifs/runtest.c')
-rw-r--r-- | drivers/platform/x86/intel/ifs/runtest.c | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c new file mode 100644 index 000000000000..b2ca2bb4501f --- /dev/null +++ b/drivers/platform/x86/intel/ifs/runtest.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. */ + +#include <linux/cpu.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/nmi.h> +#include <linux/slab.h> +#include <linux/stop_machine.h> + +#include "ifs.h" + +/* + * Note all code and data in this file is protected by + * ifs_sem. On HT systems all threads on a core will + * execute together, but only the first thread on the + * core will update results of the test. + */ + +#define CREATE_TRACE_POINTS +#include <trace/events/intel_ifs.h> + +/* Max retries on the same chunk */ +#define MAX_IFS_RETRIES 5 + +/* + * Number of TSC cycles that a logical CPU will wait for the other + * logical CPU on the core in the WRMSR(ACTIVATE_SCAN). + */ +#define IFS_THREAD_WAIT 100000 + +enum ifs_status_err_code { + IFS_NO_ERROR = 0, + IFS_OTHER_THREAD_COULD_NOT_JOIN = 1, + IFS_INTERRUPTED_BEFORE_RENDEZVOUS = 2, + IFS_POWER_MGMT_INADEQUATE_FOR_SCAN = 3, + IFS_INVALID_CHUNK_RANGE = 4, + IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS = 5, + IFS_CORE_NOT_CAPABLE_CURRENTLY = 6, + IFS_UNASSIGNED_ERROR_CODE = 7, + IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT = 8, + IFS_INTERRUPTED_DURING_EXECUTION = 9, +}; + +static const char * const scan_test_status[] = { + [IFS_NO_ERROR] = "SCAN no error", + [IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.", + [IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.", + [IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] = + "Core Abort SCAN Response due to power management condition.", + [IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range", + [IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.", + [IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently", + [IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7", + [IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = + "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently", + [IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start", +}; + +static void message_not_tested(struct device *dev, int cpu, union ifs_status status) +{ + if (status.error_code < ARRAY_SIZE(scan_test_status)) { + dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n", + cpumask_pr_args(cpu_smt_mask(cpu)), + scan_test_status[status.error_code]); + } else if (status.error_code == IFS_SW_TIMEOUT) { + dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n", + cpumask_pr_args(cpu_smt_mask(cpu))); + } else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) { + dev_info(dev, "CPU(s) %*pbl: %s\n", + cpumask_pr_args(cpu_smt_mask(cpu)), + "Not all scan chunks were executed. Maximum forward progress retries exceeded"); + } else { + dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n", + cpumask_pr_args(cpu_smt_mask(cpu)), status.data); + } +} + +static void message_fail(struct device *dev, int cpu, union ifs_status status) +{ + /* + * control_error is set when the microcode runs into a problem + * loading the image from the reserved BIOS memory, or it has + * been corrupted. Reloading the image may fix this issue. + */ + if (status.control_error) { + dev_err(dev, "CPU(s) %*pbl: could not execute from loaded scan image\n", + cpumask_pr_args(cpu_smt_mask(cpu))); + } + + /* + * signature_error is set when the output from the scan chains does not + * match the expected signature. This might be a transient problem (e.g. + * due to a bit flip from an alpha particle or neutron). If the problem + * repeats on a subsequent test, then it indicates an actual problem in + * the core being tested. + */ + if (status.signature_error) { + dev_err(dev, "CPU(s) %*pbl: test signature incorrect.\n", + cpumask_pr_args(cpu_smt_mask(cpu))); + } +} + +static bool can_restart(union ifs_status status) +{ + enum ifs_status_err_code err_code = status.error_code; + + /* Signature for chunk is bad, or scan test failed */ + if (status.signature_error || status.control_error) + return false; + + switch (err_code) { + case IFS_NO_ERROR: + case IFS_OTHER_THREAD_COULD_NOT_JOIN: + case IFS_INTERRUPTED_BEFORE_RENDEZVOUS: + case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN: + case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT: + case IFS_INTERRUPTED_DURING_EXECUTION: + return true; + case IFS_INVALID_CHUNK_RANGE: + case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS: + case IFS_CORE_NOT_CAPABLE_CURRENTLY: + case IFS_UNASSIGNED_ERROR_CODE: + break; + } + return false; +} + +/* + * Execute the scan. Called "simultaneously" on all threads of a core + * at high priority using the stop_cpus mechanism. + */ +static int doscan(void *data) +{ + int cpu = smp_processor_id(); + u64 *msrs = data; + int first; + + /* Only the first logical CPU on a core reports result */ + first = cpumask_first(cpu_smt_mask(cpu)); + + /* + * This WRMSR will wait for other HT threads to also write + * to this MSR (at most for activate.delay cycles). Then it + * starts scan of each requested chunk. The core scan happens + * during the "execution" of the WRMSR. This instruction can + * take up to 200 milliseconds (in the case where all chunks + * are processed in a single pass) before it retires. + */ + wrmsrl(MSR_ACTIVATE_SCAN, msrs[0]); + + if (cpu == first) { + /* Pass back the result of the scan */ + rdmsrl(MSR_SCAN_STATUS, msrs[1]); + } + + return 0; +} + +/* + * Use stop_core_cpuslocked() to synchronize writing to MSR_ACTIVATE_SCAN + * on all threads of the core to be tested. Loop if necessary to complete + * run of all chunks. Include some defensive tests to make sure forward + * progress is made, and that the whole test completes in a reasonable time. + */ +static void ifs_test_core(int cpu, struct device *dev) +{ + union ifs_scan activate; + union ifs_status status; + unsigned long timeout; + struct ifs_data *ifsd; + u64 msrvals[2]; + int retries; + + ifsd = ifs_get_data(dev); + + activate.rsvd = 0; + activate.delay = IFS_THREAD_WAIT; + activate.sigmce = 0; + activate.start = 0; + activate.stop = ifsd->valid_chunks - 1; + + timeout = jiffies + HZ / 2; + retries = MAX_IFS_RETRIES; + + while (activate.start <= activate.stop) { + if (time_after(jiffies, timeout)) { + status.error_code = IFS_SW_TIMEOUT; + break; + } + + msrvals[0] = activate.data; + stop_core_cpuslocked(cpu, doscan, msrvals); + + status.data = msrvals[1]; + + trace_ifs_status(cpu, activate, status); + + /* Some cases can be retried, give up for others */ + if (!can_restart(status)) + break; + + if (status.chunk_num == activate.start) { + /* Check for forward progress */ + if (--retries == 0) { + if (status.error_code == IFS_NO_ERROR) + status.error_code = IFS_SW_PARTIAL_COMPLETION; + break; + } + } else { + retries = MAX_IFS_RETRIES; + activate.start = status.chunk_num; + } + } + + /* Update status for this core */ + ifsd->scan_details = status.data; + + if (status.control_error || status.signature_error) { + ifsd->status = SCAN_TEST_FAIL; + message_fail(dev, cpu, status); + } else if (status.error_code) { + ifsd->status = SCAN_NOT_TESTED; + message_not_tested(dev, cpu, status); + } else { + ifsd->status = SCAN_TEST_PASS; + } +} + +/* + * Initiate per core test. It wakes up work queue threads on the target cpu and + * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and + * wait for all sibling threads to finish the scan test. + */ +int do_core_test(int cpu, struct device *dev) +{ + int ret = 0; + + /* Prevent CPUs from being taken offline during the scan test */ + cpus_read_lock(); + + if (!cpu_online(cpu)) { + dev_info(dev, "cannot test on the offline cpu %d\n", cpu); + ret = -EINVAL; + goto out; + } + + ifs_test_core(cpu, dev); +out: + cpus_read_unlock(); + return ret; +} |