summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c1
-rw-r--r--arch/x86/kernel/cpu/bugs.c16
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/intel.c25
-rw-r--r--arch/x86/kernel/cpu/mce/core.c7
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c312
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c89
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h108
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c253
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c42
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c288
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c4
-rw-r--r--arch/x86/kernel/cpu/vmware.c225
16 files changed, 964 insertions, 424 deletions
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index f9a8c7b7943f..b3fa61d45352 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -345,6 +345,7 @@ static DECLARE_WORK(disable_freq_invariance_work,
disable_freq_invariance_workfn);
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
+EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
static void scale_freq_tick(u64 acnt, u64 mcnt)
{
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b6f927f6c567..45675da354f3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1625,6 +1625,7 @@ static bool __init spec_ctrl_bhi_dis(void)
enum bhi_mitigations {
BHI_MITIGATION_OFF,
BHI_MITIGATION_ON,
+ BHI_MITIGATION_VMEXIT_ONLY,
};
static enum bhi_mitigations bhi_mitigation __ro_after_init =
@@ -1639,6 +1640,8 @@ static int __init spectre_bhi_parse_cmdline(char *str)
bhi_mitigation = BHI_MITIGATION_OFF;
else if (!strcmp(str, "on"))
bhi_mitigation = BHI_MITIGATION_ON;
+ else if (!strcmp(str, "vmexit"))
+ bhi_mitigation = BHI_MITIGATION_VMEXIT_ONLY;
else
pr_err("Ignoring unknown spectre_bhi option (%s)", str);
@@ -1659,19 +1662,22 @@ static void __init bhi_select_mitigation(void)
return;
}
+ /* Mitigate in hardware if supported */
if (spec_ctrl_bhi_dis())
return;
if (!IS_ENABLED(CONFIG_X86_64))
return;
- /* Mitigate KVM by default */
- setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
- pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+ if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) {
+ pr_info("Spectre BHI mitigation: SW BHB clearing on VM exit only\n");
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ return;
+ }
- /* Mitigate syscalls when the mitigation is forced =on */
+ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall and VM exit\n");
setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
- pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
}
static void __init spectre_v2_select_mitigation(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2b170da84f97..d4e539d4e158 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1075,6 +1075,10 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
c->x86_virt_bits = (eax >> 8) & 0xff;
c->x86_phys_bits = eax & 0xff;
+
+ /* Provide a sane default if not enumerated: */
+ if (!c->x86_clflush_size)
+ c->x86_clflush_size = 32;
}
c->x86_cache_bits = c->x86_phys_bits;
@@ -1585,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (have_cpuid_p()) {
cpu_detect(c);
get_cpu_vendor(c);
+ intel_unlock_cpuid_leafs(c);
get_cpu_cap(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
get_cpu_address_sizes(c);
@@ -1744,7 +1749,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
cpu_detect(c);
get_cpu_vendor(c);
-
+ intel_unlock_cpuid_leafs(c);
get_cpu_cap(c);
get_cpu_address_sizes(c);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index ea9e07d57c8d..1beccefbaff9 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -61,9 +61,11 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
extern void __init tsx_init(void);
void tsx_ap_init(void);
+void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c);
#else
static inline void tsx_init(void) { }
static inline void tsx_ap_init(void) { }
+static inline void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { }
#endif /* CONFIG_CPU_SUP_INTEL */
extern void init_spectral_chicken(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a9ea0dba6f0c..08b95a35b5cb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -218,19 +218,26 @@ static void detect_tme_early(struct cpuinfo_x86 *c)
keyid_bits);
}
+void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return;
+
+ if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd))
+ return;
+
+ /*
+ * The BIOS can have limited CPUID to leaf 2, which breaks feature
+ * enumeration. Unlock it and update the maximum leaf info.
+ */
+ if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0)
+ c->cpuid_level = cpuid_eax(0);
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
- /* Unmask CPUID levels if masked: */
- if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
- if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
- MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
- c->cpuid_level = cpuid_eax(0);
- get_cpu_cap(c);
- }
- }
-
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index ad0623b659ed..b85ec7a4ec9e 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -677,10 +677,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
* is already totally * confused. In this case it's likely it will
* not fully execute the machine check handler either.
*/
-bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
- bool error_seen = false;
struct mce m;
int i;
@@ -754,8 +753,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
continue;
log_it:
- error_seen = true;
-
if (flags & MCP_DONTLOG)
goto clear_it;
@@ -787,8 +784,6 @@ clear_it:
*/
sync_core();
-
- return error_seen;
}
EXPORT_SYMBOL_GPL(machine_check_poll);
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 4ade2a3ba312..49ed3428785d 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -487,12 +487,16 @@ static void prepare_msrs(void *info)
wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
}
- wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
+
+ if (m.misc)
+ wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
} else {
wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
- wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
+
+ if (m.misc)
+ wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index a113d9aba553..1930fce9dfe9 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -19,7 +19,6 @@
#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/err.h>
-#include <linux/cacheinfo.h>
#include <linux/cpuhotplug.h>
#include <asm/cpu_device_id.h>
@@ -60,7 +59,8 @@ static void mba_wrmsr_intel(struct msr_param *m);
static void cat_wrmsr(struct msr_param *m);
static void mba_wrmsr_amd(struct msr_param *m);
-#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)
+#define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
+#define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)
struct rdt_hw_resource rdt_resources_all[] = {
[RDT_RESOURCE_L3] =
@@ -68,8 +68,10 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L3,
.name = "L3",
- .cache_level = 3,
- .domains = domain_init(RDT_RESOURCE_L3),
+ .ctrl_scope = RESCTRL_L3_CACHE,
+ .mon_scope = RESCTRL_L3_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3),
+ .mon_domains = mon_domain_init(RDT_RESOURCE_L3),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
.fflags = RFTYPE_RES_CACHE,
@@ -82,8 +84,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L2,
.name = "L2",
- .cache_level = 2,
- .domains = domain_init(RDT_RESOURCE_L2),
+ .ctrl_scope = RESCTRL_L2_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
.fflags = RFTYPE_RES_CACHE,
@@ -96,8 +98,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_MBA,
.name = "MB",
- .cache_level = 3,
- .domains = domain_init(RDT_RESOURCE_MBA),
+ .ctrl_scope = RESCTRL_L3_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
@@ -108,8 +110,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_SMBA,
.name = "SMBA",
- .cache_level = 3,
- .domains = domain_init(RDT_RESOURCE_SMBA),
+ .ctrl_scope = RESCTRL_L3_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
@@ -306,8 +308,8 @@ static void rdt_get_cdp_l2_config(void)
static void mba_wrmsr_amd(struct msr_param *m)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
for (i = m->low; i < m->high; i++)
@@ -330,8 +332,8 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
static void mba_wrmsr_intel(struct msr_param *m)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
/* Write the delay values for mba. */
@@ -341,23 +343,38 @@ static void mba_wrmsr_intel(struct msr_param *m)
static void cat_wrmsr(struct msr_param *m)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
for (i = m->low; i < m->high; i++)
wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}
-struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
+struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r)
{
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
lockdep_assert_cpus_held();
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
/* Find the domain that contains this CPU */
- if (cpumask_test_cpu(cpu, &d->cpu_mask))
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
+ return d;
+ }
+
+ return NULL;
+}
+
+struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r)
+{
+ struct rdt_mon_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
return d;
}
@@ -379,24 +396,21 @@ void rdt_ctrl_update(void *arg)
}
/*
- * rdt_find_domain - Find a domain in a resource that matches input resource id
+ * rdt_find_domain - Search for a domain id in a resource domain list.
*
- * Search resource r's domain list to find the resource id. If the resource
- * id is found in a domain, return the domain. Otherwise, if requested by
- * caller, return the first domain whose id is bigger than the input id.
- * The domain list is sorted by id in ascending order.
+ * Search the domain list to find the domain id. If the domain id is
+ * found, return the domain. NULL otherwise. If the domain id is not
+ * found (and NULL returned) then the first domain with id bigger than
+ * the input id can be returned to the caller via @pos.
*/
-struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
- struct list_head **pos)
+struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
+ struct list_head **pos)
{
- struct rdt_domain *d;
+ struct rdt_domain_hdr *d;
struct list_head *l;
- if (id < 0)
- return ERR_PTR(-ENODEV);
-
- list_for_each(l, &r->domains) {
- d = list_entry(l, struct rdt_domain, list);
+ list_for_each(l, h) {
+ d = list_entry(l, struct rdt_domain_hdr, list);
/* When id is found, return its domain. */
if (id == d->id)
return d;
@@ -425,18 +439,23 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
*dc = r->default_ctrl;
}
-static void domain_free(struct rdt_hw_domain *hw_dom)
+static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
+{
+ kfree(hw_dom->ctrl_val);
+ kfree(hw_dom);
+}
+
+static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
{
kfree(hw_dom->arch_mbm_total);
kfree(hw_dom->arch_mbm_local);
- kfree(hw_dom->ctrl_val);
kfree(hw_dom);
}
-static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
+static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct msr_param m;
u32 *dc;
@@ -461,7 +480,7 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
* @num_rmid: The size of the MBM counter array
* @hw_dom: The domain that owns the allocated arrays
*/
-static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
+static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
{
size_t tsize;
@@ -484,37 +503,45 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
return 0;
}
-/*
- * domain_add_cpu - Add a cpu to a resource's domain list.
- *
- * If an existing domain in the resource r's domain list matches the cpu's
- * resource id, add the cpu in the domain.
- *
- * Otherwise, a new domain is allocated and inserted into the right position
- * in the domain list sorted by id in ascending order.
- *
- * The order in the domain list is visible to users when we print entries
- * in the schemata file and schemata input is validated to have the same order
- * as this list.
- */
-static void domain_add_cpu(int cpu, struct rdt_resource *r)
+static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
{
- int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
+ switch (scope) {
+ case RESCTRL_L2_CACHE:
+ case RESCTRL_L3_CACHE:
+ return get_cpu_cacheinfo_id(cpu, scope);
+ case RESCTRL_L3_NODE:
+ return cpu_to_node(cpu);
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
+static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
+{
+ int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
+ struct rdt_hw_ctrl_domain *hw_dom;
struct list_head *add_pos = NULL;
- struct rdt_hw_domain *hw_dom;
- struct rdt_domain *d;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_ctrl_domain *d;
int err;
lockdep_assert_held(&domain_list_lock);
- d = rdt_find_domain(r, id, &add_pos);
- if (IS_ERR(d)) {
- pr_warn("Couldn't find cache id for CPU %d\n", cpu);
+ if (id < 0) {
+ pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->ctrl_scope, r->name);
return;
}
- if (d) {
- cpumask_set_cpu(cpu, &d->cpu_mask);
+ hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos);
+ if (hdr) {
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+ return;
+ d = container_of(hdr, struct rdt_ctrl_domain, hdr);
+
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
if (r->cache.arch_has_per_cpu_cfg)
rdt_domain_reconfigure_cdp(r);
return;
@@ -525,64 +552,187 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
d = &hw_dom->d_resctrl;
- d->id = id;
- cpumask_set_cpu(cpu, &d->cpu_mask);
+ d->hdr.id = id;
+ d->hdr.type = RESCTRL_CTRL_DOMAIN;
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
rdt_domain_reconfigure_cdp(r);
- if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
- domain_free(hw_dom);
+ if (domain_setup_ctrlval(r, d)) {
+ ctrl_domain_free(hw_dom);
return;
}
- if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
- domain_free(hw_dom);
+ list_add_tail_rcu(&d->hdr.list, add_pos);
+
+ err = resctrl_online_ctrl_domain(r, d);
+ if (err) {
+ list_del_rcu(&d->hdr.list);
+ synchronize_rcu();
+ ctrl_domain_free(hw_dom);
+ }
+}
+
+static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
+{
+ int id = get_domain_id_from_scope(cpu, r->mon_scope);
+ struct list_head *add_pos = NULL;
+ struct rdt_hw_mon_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_mon_domain *d;
+ int err;
+
+ lockdep_assert_held(&domain_list_lock);
+
+ if (id < 0) {
+ pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->mon_scope, r->name);
return;
}
- list_add_tail_rcu(&d->list, add_pos);
+ hdr = rdt_find_domain(&r->mon_domains, id, &add_pos);
+ if (hdr) {
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+ return;
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+ return;
+ }
- err = resctrl_online_domain(r, d);
+ hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
+ if (!hw_dom)
+ return;
+
+ d = &hw_dom->d_resctrl;
+ d->hdr.id = id;
+ d->hdr.type = RESCTRL_MON_DOMAIN;
+ d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!d->ci) {
+ pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
+ mon_domain_free(hw_dom);
+ return;
+ }
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+
+ arch_mon_domain_online(r, d);
+
+ if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
+ mon_domain_free(hw_dom);
+ return;
+ }
+
+ list_add_tail_rcu(&d->hdr.list, add_pos);
+
+ err = resctrl_online_mon_domain(r, d);
if (err) {
- list_del_rcu(&d->list);
+ list_del_rcu(&d->hdr.list);
synchronize_rcu();
- domain_free(hw_dom);
+ mon_domain_free(hw_dom);
}
}
-static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+static void domain_add_cpu(int cpu, struct rdt_resource *r)
+{
+ if (r->alloc_capable)
+ domain_add_cpu_ctrl(cpu, r);
+ if (r->mon_capable)
+ domain_add_cpu_mon(cpu, r);
+}
+
+static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
{
- int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
- struct rdt_hw_domain *hw_dom;
- struct rdt_domain *d;
+ int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
+ struct rdt_hw_ctrl_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_ctrl_domain *d;
lockdep_assert_held(&domain_list_lock);
- d = rdt_find_domain(r, id, NULL);
- if (IS_ERR_OR_NULL(d)) {
- pr_warn("Couldn't find cache id for CPU %d\n", cpu);
+ if (id < 0) {
+ pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->ctrl_scope, r->name);
+ return;
+ }
+
+ hdr = rdt_find_domain(&r->ctrl_domains, id, NULL);
+ if (!hdr) {
+ pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
+ id, cpu, r->name);
return;
}
- hw_dom = resctrl_to_arch_dom(d);
- cpumask_clear_cpu(cpu, &d->cpu_mask);
- if (cpumask_empty(&d->cpu_mask)) {
- resctrl_offline_domain(r, d);
- list_del_rcu(&d->list);
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+ return;
+
+ d = container_of(hdr, struct rdt_ctrl_domain, hdr);
+ hw_dom = resctrl_to_arch_ctrl_dom(d);
+
+ cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
+ if (cpumask_empty(&d->hdr.cpu_mask)) {
+ resctrl_offline_ctrl_domain(r, d);
+ list_del_rcu(&d->hdr.list);
synchronize_rcu();
/*
- * rdt_domain "d" is going to be freed below, so clear
+ * rdt_ctrl_domain "d" is going to be freed below, so clear
* its pointer from pseudo_lock_region struct.
*/
if (d->plr)
d->plr->d = NULL;
- domain_free(hw_dom);
+ ctrl_domain_free(hw_dom);
return;
}
}
+static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
+{
+ int id = get_domain_id_from_scope(cpu, r->mon_scope);
+ struct rdt_hw_mon_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_mon_domain *d;
+
+ lockdep_assert_held(&domain_list_lock);
+
+ if (id < 0) {
+ pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->mon_scope, r->name);
+ return;
+ }
+
+ hdr = rdt_find_domain(&r->mon_domains, id, NULL);
+ if (!hdr) {
+ pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
+ id, cpu, r->name);
+ return;
+ }
+
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+ return;
+
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+ hw_dom = resctrl_to_arch_mon_dom(d);
+
+ cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
+ if (cpumask_empty(&d->hdr.cpu_mask)) {
+ resctrl_offline_mon_domain(r, d);
+ list_del_rcu(&d->hdr.list);
+ synchronize_rcu();
+ mon_domain_free(hw_dom);
+
+ return;
+ }
+}
+
+static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+{
+ if (r->alloc_capable)
+ domain_remove_cpu_ctrl(cpu, r);
+ if (r->mon_capable)
+ domain_remove_cpu_mon(cpu, r);
+}
+
static void clear_closid_rmid(int cpu)
{
struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index b7291f60399c..50fa1fe9a073 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -60,7 +60,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
}
int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
struct resctrl_staged_config *cfg;
u32 closid = data->rdtgrp->closid;
@@ -69,7 +69,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
return -EINVAL;
}
@@ -139,7 +139,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
* resource type.
*/
int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
struct rdtgroup *rdtgrp = data->rdtgrp;
struct resctrl_staged_config *cfg;
@@ -148,7 +148,7 @@ int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
return -EINVAL;
}
@@ -208,8 +208,8 @@ static int parse_line(char *line, struct resctrl_schema *s,
struct resctrl_staged_config *cfg;
struct rdt_resource *r = s->res;
struct rdt_parse_data data;
+ struct rdt_ctrl_domain *d;
char *dom = NULL, *id;
- struct rdt_domain *d;
unsigned long dom_id;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -231,8 +231,8 @@ next:
return -EINVAL;
}
dom = strim(dom);
- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
data.buf = dom;
data.rdtgrp = rdtgrp;
if (r->parse_ctrlval(&data, s, d))
@@ -272,15 +272,15 @@ static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
}
}
-int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
u32 idx = get_config_index(closid, t);
struct msr_param msr_param;
- if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
+ if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask))
return -EINVAL;
hw_dom->ctrl_val[idx] = cfg_val;
@@ -297,17 +297,17 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
{
struct resctrl_staged_config *cfg;
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_ctrl_domain *hw_dom;
struct msr_param msr_param;
+ struct rdt_ctrl_domain *d;
enum resctrl_conf_type t;
- struct rdt_domain *d;
u32 idx;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
- list_for_each_entry(d, &r->domains, list) {
- hw_dom = resctrl_to_arch_dom(d);
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ hw_dom = resctrl_to_arch_ctrl_dom(d);
msr_param.res = NULL;
for (t = 0; t < CDP_NUM_TYPES; t++) {
cfg = &hw_dom->d_resctrl.staged_config[t];
@@ -330,7 +330,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
}
}
if (msr_param.res)
- smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1);
+ smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
}
return 0;
@@ -430,10 +430,10 @@ out:
return ret ?: nbytes;
}
-u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
+u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type type)
{
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
u32 idx = get_config_index(closid, type);
return hw_dom->ctrl_val[idx];
@@ -442,7 +442,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
{
struct rdt_resource *r = schema->res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
bool sep = false;
u32 ctrl_val;
@@ -450,7 +450,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
lockdep_assert_cpus_held();
seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
if (sep)
seq_puts(s, ";");
@@ -460,7 +460,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
ctrl_val = resctrl_arch_get_config(r, dom, closid,
schema->conf_type);
- seq_printf(s, r->format_str, dom->id, max_data_width,
+ seq_printf(s, r->format_str, dom->hdr.id, max_data_width,
ctrl_val);
sep = true;
}
@@ -489,7 +489,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
} else {
seq_printf(s, "%s:%d=%x\n",
rdtgrp->plr->s->res->name,
- rdtgrp->plr->d->id,
+ rdtgrp->plr->d->hdr.id,
rdtgrp->plr->cbm);
}
} else {
@@ -514,8 +514,8 @@ static int smp_mon_event_count(void *arg)
}
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
- struct rdt_domain *d, struct rdtgroup *rdtgrp,
- int evtid, int first)
+ struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+ cpumask_t *cpumask, int evtid, int first)
{
int cpu;
@@ -529,7 +529,6 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
rr->evtid = evtid;
rr->r = r;
rr->d = d;
- rr->val = 0;
rr->first = first;
rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
if (IS_ERR(rr->arch_mon_ctx)) {
@@ -537,7 +536,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
return;
}
- cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
+ cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
/*
* cpumask_any_housekeeping() prefers housekeeping CPUs, but
@@ -546,7 +545,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
* counters on some platforms if its called in IRQ context.
*/
if (tick_nohz_full_cpu(cpu))
- smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
+ smp_call_function_any(cpumask, mon_event_count, rr, 1);
else
smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
@@ -556,12 +555,13 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
int rdtgroup_mondata_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
+ struct rdt_domain_hdr *hdr;
+ struct rmid_read rr = {0};
+ struct rdt_mon_domain *d;
u32 resid, evtid, domid;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
union mon_data_bits md;
- struct rdt_domain *d;
- struct rmid_read rr;
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
@@ -574,15 +574,40 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
resid = md.u.rid;
domid = md.u.domid;
evtid = md.u.evtid;
-
r = &rdt_resources_all[resid].r_resctrl;
- d = rdt_find_domain(r, domid, NULL);
- if (IS_ERR_OR_NULL(d)) {
+
+ if (md.u.sum) {
+ /*
+ * This file requires summing across all domains that share
+ * the L3 cache id that was provided in the "domid" field of the
+ * mon_data_bits union. Search all domains in the resource for
+ * one that matches this cache id.
+ */
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (d->ci->id == domid) {
+ rr.ci = d->ci;
+ mon_event_read(&rr, r, NULL, rdtgrp,
+ &d->ci->shared_cpu_map, evtid, false);
+ goto checkresult;
+ }
+ }
ret = -ENOENT;
goto out;
+ } else {
+ /*
+ * This file provides data from a single domain. Search
+ * the resource to find the domain with "domid".
+ */
+ hdr = rdt_find_domain(&r->mon_domains, domid, NULL);
+ if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
+ ret = -ENOENT;
+ goto out;
+ }
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+ mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false);
}
- mon_event_read(&rr, r, d, rdtgrp, evtid, false);
+checkresult:
if (rr.err == -EIO)
seq_puts(m, "Error\n");
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index f1d926832ec8..955999aecfca 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -127,29 +127,54 @@ struct mon_evt {
};
/**
- * union mon_data_bits - Monitoring details for each event file
+ * union mon_data_bits - Monitoring details for each event file.
* @priv: Used to store monitoring event data in @u
- * as kernfs private data
- * @rid: Resource id associated with the event file
- * @evtid: Event id associated with the event file
- * @domid: The domain to which the event file belongs
- * @u: Name of the bit fields struct
+ * as kernfs private data.
+ * @u.rid: Resource id associated with the event file.
+ * @u.evtid: Event id associated with the event file.
+ * @u.sum: Set when event must be summed across multiple
+ * domains.
+ * @u.domid: When @u.sum is zero this is the domain to which
+ * the event file belongs. When @sum is one this
+ * is the id of the L3 cache that all domains to be
+ * summed share.
+ * @u: Name of the bit fields struct.
*/
union mon_data_bits {
void *priv;
struct {
unsigned int rid : 10;
- enum resctrl_event_id evtid : 8;
+ enum resctrl_event_id evtid : 7;
+ unsigned int sum : 1;
unsigned int domid : 14;
} u;
};
+/**
+ * struct rmid_read - Data passed across smp_call*() to read event count.
+ * @rgrp: Resource group for which the counter is being read. If it is a parent
+ * resource group then its event count is summed with the count from all
+ * its child resource groups.
+ * @r: Resource describing the properties of the event being read.
+ * @d: Domain that the counter should be read from. If NULL then sum all
+ * domains in @r sharing L3 @ci.id
+ * @evtid: Which monitor event to read.
+ * @first: Initialize MBM counter when true.
+ * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
+ * @err: Error encountered when reading counter.
+ * @val: Returned value of event counter. If @rgrp is a parent resource group,
+ * @val includes the sum of event counts from its child resource groups.
+ * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id,
+ * (summed across child resource groups if @rgrp is a parent resource group).
+ * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only).
+ */
struct rmid_read {
struct rdtgroup *rgrp;
struct rdt_resource *r;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
enum resctrl_event_id evtid;
bool first;
+ struct cacheinfo *ci;
int err;
u64 val;
void *arch_mon_ctx;
@@ -232,7 +257,7 @@ struct mongroup {
*/
struct pseudo_lock_region {
struct resctrl_schema *s;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
u32 cbm;
wait_queue_head_t lock_thread_wq;
int thread_done;
@@ -355,25 +380,41 @@ struct arch_mbm_state {
};
/**
- * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share
- * a resource
+ * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
+ * a resource for a control function
* @d_resctrl: Properties exposed to the resctrl file system
* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ *
+ * Members of this structure are accessed via helpers that provide abstraction.
+ */
+struct rdt_hw_ctrl_domain {
+ struct rdt_ctrl_domain d_resctrl;
+ u32 *ctrl_val;
+};
+
+/**
+ * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share
+ * a resource for a monitor function
+ * @d_resctrl: Properties exposed to the resctrl file system
* @arch_mbm_total: arch private state for MBM total bandwidth
* @arch_mbm_local: arch private state for MBM local bandwidth
*
* Members of this structure are accessed via helpers that provide abstraction.
*/
-struct rdt_hw_domain {
- struct rdt_domain d_resctrl;
- u32 *ctrl_val;
+struct rdt_hw_mon_domain {
+ struct rdt_mon_domain d_resctrl;
struct arch_mbm_state *arch_mbm_total;
struct arch_mbm_state *arch_mbm_local;
};
-static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
+static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r)
{
- return container_of(r, struct rdt_hw_domain, d_resctrl);
+ return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl);
+}
+
+static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r)
+{
+ return container_of(r, struct rdt_hw_mon_domain, d_resctrl);
}
/**
@@ -385,7 +426,7 @@ static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
*/
struct msr_param {
struct rdt_resource *res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
u32 low;
u32 high;
};
@@ -458,9 +499,9 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r
}
int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d);
+ struct rdt_ctrl_domain *d);
int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d);
+ struct rdt_ctrl_domain *d);
extern struct mutex rdtgroup_mutex;
@@ -493,6 +534,8 @@ static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d);
+
/*
* To return the common struct rdt_resource, which is contained in struct
* rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
@@ -558,27 +601,28 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn);
int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
umode_t mask);
-struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
- struct list_head **pos);
+struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
+ struct list_head **pos);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid, bool exclusive);
-unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
+unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d,
unsigned long cbm);
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
int rdtgroup_tasks_assigned(struct rdtgroup *r);
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm);
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d);
int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);
int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
-struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
+struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r);
+struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r);
int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(u32 closid);
@@ -589,19 +633,19 @@ bool __init rdt_cpu_has(int flag);
void mon_event_count(void *info);
int rdtgroup_mondata_show(struct seq_file *m, void *arg);
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
- struct rdt_domain *d, struct rdtgroup *rdtgrp,
- int evtid, int first);
-void mbm_setup_overflow_handler(struct rdt_domain *dom,
+ struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+ cpumask_t *cpumask, int evtid, int first);
+void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
unsigned long delay_ms,
int exclude_cpu);
void mbm_handle_overflow(struct work_struct *work);
void __init intel_rdt_mbm_apply_quirk(void);
bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
int exclude_cpu);
void cqm_handle_limbo(struct work_struct *work);
-bool has_busy_rmid(struct rdt_domain *d);
-void __check_limbo(struct rdt_domain *d, bool force_free);
+bool has_busy_rmid(struct rdt_mon_domain *d);
+void __check_limbo(struct rdt_mon_domain *d, bool force_free);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
void __init thread_throttle_mode_init(void);
void __init mbm_config_rftype_init(const char *config);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 2345e6836593..851b561850e0 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -15,6 +15,8 @@
* Software Developer Manual June 2016, volume 3, section 17.17.
*/
+#define pr_fmt(fmt) "resctrl: " fmt
+
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/sizes.h>
@@ -97,6 +99,8 @@ unsigned int resctrl_rmid_realloc_limit;
#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
+static int snc_nodes_per_l3_cache = 1;
+
/*
* The correction factor table is documented in Documentation/arch/x86/resctrl.rst.
* If rmid > rmid threshold, MBM total and local values should be multiplied
@@ -185,7 +189,43 @@ static inline struct rmid_entry *__rmid_entry(u32 idx)
return entry;
}
-static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
+/*
+ * When Sub-NUMA Cluster (SNC) mode is not enabled (as indicated by
+ * "snc_nodes_per_l3_cache == 1") no translation of the RMID value is
+ * needed. The physical RMID is the same as the logical RMID.
+ *
+ * On a platform with SNC mode enabled, Linux enables RMID sharing mode
+ * via MSR 0xCA0 (see the "RMID Sharing Mode" section in the "Intel
+ * Resource Director Technology Architecture Specification" for a full
+ * description of RMID sharing mode).
+ *
+ * In RMID sharing mode there are fewer "logical RMID" values available
+ * to accumulate data ("physical RMIDs" are divided evenly between SNC
+ * nodes that share an L3 cache). Linux creates an rdt_mon_domain for
+ * each SNC node.
+ *
+ * The value loaded into IA32_PQR_ASSOC is the "logical RMID".
+ *
+ * Data is collected independently on each SNC node and can be retrieved
+ * using the "physical RMID" value computed by this function and loaded
+ * into IA32_QM_EVTSEL. @cpu can be any CPU in the SNC node.
+ *
+ * The scope of the IA32_QM_EVTSEL and IA32_QM_CTR MSRs is at the L3
+ * cache. So a "physical RMID" may be read from any CPU that shares
+ * the L3 cache with the desired SNC node, not just from a CPU in
+ * the specific SNC node.
+ */
+static int logical_rmid_to_physical_rmid(int cpu, int lrmid)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
+ if (snc_nodes_per_l3_cache == 1)
+ return lrmid;
+
+ return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->num_rmid;
+}
+
+static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
{
u64 msr_val;
@@ -197,7 +237,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
* IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
* are error bits.
*/
- wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, prmid);
rdmsrl(MSR_IA32_QM_CTR, msr_val);
if (msr_val & RMID_VAL_ERROR)
@@ -209,7 +249,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
return 0;
}
-static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
+static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
u32 rmid,
enum resctrl_event_id eventid)
{
@@ -228,19 +268,22 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
return NULL;
}
-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid,
enum resctrl_event_id eventid)
{
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+ int cpu = cpumask_any(&d->hdr.cpu_mask);
struct arch_mbm_state *am;
+ u32 prmid;
am = get_arch_mbm_state(hw_dom, rmid, eventid);
if (am) {
memset(am, 0, sizeof(*am));
+ prmid = logical_rmid_to_physical_rmid(cpu, rmid);
/* Record any initial, non-zero count value. */
- __rmid_read(rmid, eventid, &am->prev_msr);
+ __rmid_read_phys(prmid, eventid, &am->prev_msr);
}
}
@@ -248,9 +291,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
* Assumes that hardware counters are also reset and thus that there is
* no need to record initial non-zero counts.
*/
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
{
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
if (is_mbm_total_enabled())
memset(hw_dom->arch_mbm_total, 0,
@@ -269,22 +312,22 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
return chunks >> shift;
}
-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *ignored)
{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ int cpu = cpumask_any(&d->hdr.cpu_mask);
struct arch_mbm_state *am;
u64 msr_val, chunks;
+ u32 prmid;
int ret;
resctrl_arch_rmid_read_context_check();
- if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
- return -EINVAL;
-
- ret = __rmid_read(rmid, eventid, &msr_val);
+ prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+ ret = __rmid_read_phys(prmid, eventid, &msr_val);
if (ret)
return ret;
@@ -320,7 +363,7 @@ static void limbo_release_entry(struct rmid_entry *entry)
* decrement the count. If the busy count gets to zero on an RMID, we
* free the RMID
*/
-void __check_limbo(struct rdt_domain *d, bool force_free)
+void __check_limbo(struct rdt_mon_domain *d, bool force_free)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
@@ -364,7 +407,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
* CLOSID and RMID because there may be dependencies between them
* on some architectures.
*/
- trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->id, val);
+ trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
}
if (force_free || !rmid_dirty) {
@@ -378,7 +421,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
}
-bool has_busy_rmid(struct rdt_domain *d)
+bool has_busy_rmid(struct rdt_mon_domain *d)
{
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
@@ -479,7 +522,7 @@ int alloc_rmid(u32 closid)
static void add_rmid_to_limbo(struct rmid_entry *entry)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
u32 idx;
lockdep_assert_held(&rdtgroup_mutex);
@@ -490,7 +533,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
entry->busy = 0;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
/*
* For the first limbo RMID in the domain,
* setup up the limbo worker.
@@ -519,7 +562,8 @@ void free_rmid(u32 closid, u32 rmid)
* allows architectures that ignore the closid parameter to avoid an
* unnecessary check.
*/
- if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
+ if (!resctrl_arch_mon_capable() ||
+ idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
RESCTRL_RESERVED_RMID))
return;
@@ -531,7 +575,7 @@ void free_rmid(u32 closid, u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}
-static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid,
+static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
u32 rmid, enum resctrl_event_id evtid)
{
u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
@@ -548,7 +592,10 @@ static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid,
static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
+ int cpu = smp_processor_id();
+ struct rdt_mon_domain *d;
struct mbm_state *m;
+ int err, ret;
u64 tval = 0;
if (rr->first) {
@@ -559,14 +606,47 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
return 0;
}
- rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
- &tval, rr->arch_mon_ctx);
- if (rr->err)
- return rr->err;
+ if (rr->d) {
+ /* Reading a single domain, must be on a CPU in that domain. */
+ if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
+ return -EINVAL;
+ rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
+ rr->evtid, &tval, rr->arch_mon_ctx);
+ if (rr->err)
+ return rr->err;
- rr->val += tval;
+ rr->val += tval;
- return 0;
+ return 0;
+ }
+
+ /* Summing domains that share a cache, must be on a CPU for that cache. */
+ if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
+ return -EINVAL;
+
+ /*
+ * Legacy files must report the sum of an event across all
+ * domains that share the same L3 cache instance.
+ * Report success if a read from any domain succeeds, -EINVAL
+ * (translated to "Unavailable" for user space) if reading from
+ * all domains fail for any reason.
+ */
+ ret = -EINVAL;
+ list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
+ if (d->ci->id != rr->ci->id)
+ continue;
+ err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
+ rr->evtid, &tval, rr->arch_mon_ctx);
+ if (!err) {
+ rr->val += tval;
+ ret = 0;
+ }
+ }
+
+ if (ret)
+ rr->err = ret;
+
+ return ret;
}
/*
@@ -667,12 +747,12 @@ void mon_event_count(void *info)
* throttle MSRs already have low percentage values. To avoid
* unnecessarily restricting such rdtgroups, we also increase the bandwidth.
*/
-static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
{
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
+ struct rdt_ctrl_domain *dom_mba;
struct rdt_resource *r_mba;
- struct rdt_domain *dom_mba;
u32 cur_bw, user_bw, idx;
struct list_head *head;
struct rdtgroup *entry;
@@ -687,7 +767,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
idx = resctrl_arch_rmid_idx_encode(closid, rmid);
pmbm_data = &dom_mbm->mbm_local[idx];
- dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
+ dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
pr_warn_once("Failure to get domain for MBA update\n");
return;
@@ -733,12 +813,11 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
}
-static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
+static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid)
{
- struct rmid_read rr;
+ struct rmid_read rr = {0};
- rr.first = false;
rr.r = r;
rr.d = d;
@@ -791,17 +870,17 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
void cqm_handle_limbo(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
- d = container_of(work, struct rdt_domain, cqm_limbo.work);
+ d = container_of(work, struct rdt_mon_domain, cqm_limbo.work);
__check_limbo(d, false);
if (has_busy_rmid(d)) {
- d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+ d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
RESCTRL_PICK_ANY_CPU);
schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
delay);
@@ -819,13 +898,13 @@ void cqm_handle_limbo(struct work_struct *work)
* @exclude_cpu: Which CPU the handler should not run on,
* RESCTRL_PICK_ANY_CPU to pick any CPU.
*/
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
int cpu;
- cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
+ cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
dom->cqm_work_cpu = cpu;
if (cpu < nr_cpu_ids)
@@ -836,9 +915,9 @@ void mbm_handle_overflow(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
struct rdtgroup *prgrp, *crgrp;
+ struct rdt_mon_domain *d;
struct list_head *head;
struct rdt_resource *r;
- struct rdt_domain *d;
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
@@ -851,7 +930,7 @@ void mbm_handle_overflow(struct work_struct *work)
goto out_unlock;
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- d = container_of(work, struct rdt_domain, mbm_over.work);
+ d = container_of(work, struct rdt_mon_domain, mbm_over.work);
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
@@ -868,7 +947,7 @@ void mbm_handle_overflow(struct work_struct *work)
* Re-check for housekeeping CPUs. This allows the overflow handler to
* move off a nohz_full CPU quickly.
*/
- d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+ d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
RESCTRL_PICK_ANY_CPU);
schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
@@ -885,7 +964,7 @@ out_unlock:
* @exclude_cpu: Which CPU the handler should not run on,
* RESCTRL_PICK_ANY_CPU to pick any CPU.
*/
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
+void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
@@ -897,7 +976,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
*/
if (!resctrl_mounted || !resctrl_arch_mon_capable())
return;
- cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
+ cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
dom->mbm_work_cpu = cpu;
if (cpu < nr_cpu_ids)
@@ -1014,6 +1093,88 @@ static void l3_mon_evt_init(struct rdt_resource *r)
list_add_tail(&mbm_local_event.list, &r->evt_list);
}
+/*
+ * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1
+ * which indicates that RMIDs are configured in legacy mode.
+ * This mode is incompatible with Linux resctrl semantics
+ * as RMIDs are partitioned between SNC nodes, which requires
+ * a user to know which RMID is allocated to a task.
+ * Clearing bit 0 reconfigures the RMID counters for use
+ * in RMID sharing mode. This mode is better for Linux.
+ * The RMID space is divided between all SNC nodes with the
+ * RMIDs renumbered to start from zero in each node when
+ * counting operations from tasks. Code to read the counters
+ * must adjust RMID counter numbers based on SNC node. See
+ * logical_rmid_to_physical_rmid() for code that does this.
+ */
+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ if (snc_nodes_per_l3_cache > 1)
+ msr_clear_bit(MSR_RMID_SNC_CONFIG, 0);
+}
+
+/* CPU models that support MSR_RMID_SNC_CONFIG */
+static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
+ X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0),
+ {}
+};
+
+/*
+ * There isn't a simple hardware bit that indicates whether a CPU is running
+ * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the
+ * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in
+ * the same NUMA node as CPU0.
+ * It is not possible to accurately determine SNC state if the system is
+ * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
+ * to L3 caches. It will be OK if system is booted with hyperthreading
+ * disabled (since this doesn't affect the ratio).
+ */
+static __init int snc_get_config(void)
+{
+ struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE);
+ const cpumask_t *node0_cpumask;
+ int cpus_per_node, cpus_per_l3;
+ int ret;
+
+ if (!x86_match_cpu(snc_cpu_ids) || !ci)
+ return 1;
+
+ cpus_read_lock();
+ if (num_online_cpus() != num_present_cpus())
+ pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
+ cpus_read_unlock();
+
+ node0_cpumask = cpumask_of_node(cpu_to_node(0));
+
+ cpus_per_node = cpumask_weight(node0_cpumask);
+ cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map);
+
+ if (!cpus_per_node || !cpus_per_l3)
+ return 1;
+
+ ret = cpus_per_l3 / cpus_per_node;
+
+ /* sanity check: Only valid results are 1, 2, 3, 4 */
+ switch (ret) {
+ case 1:
+ break;
+ case 2 ... 4:
+ pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret);
+ rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE;
+ break;
+ default:
+ pr_warn("Ignore improbable SNC node count %d\n", ret);
+ ret = 1;
+ break;
+ }
+
+ return ret;
+}
+
int __init rdt_get_mon_l3_config(struct rdt_resource *r)
{
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
@@ -1021,9 +1182,11 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
unsigned int threshold;
int ret;
+ snc_nodes_per_l3_cache = snc_get_config();
+
resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
- hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale;
- r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
+ hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache;
+ r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index aacf236dfe3b..e69489d48625 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -11,7 +11,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/debugfs.h>
@@ -221,7 +220,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
int cpu;
int ret;
- for_each_cpu(cpu, &plr->d->cpu_mask) {
+ for_each_cpu(cpu, &plr->d->hdr.cpu_mask) {
pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
if (!pm_req) {
rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
@@ -292,12 +291,15 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
*/
static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
{
- struct cpu_cacheinfo *ci;
+ enum resctrl_scope scope = plr->s->res->ctrl_scope;
+ struct cacheinfo *ci;
int ret;
- int i;
+
+ if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
+ return -ENODEV;
/* Pick the first cpu we find that is associated with the cache. */
- plr->cpu = cpumask_first(&plr->d->cpu_mask);
+ plr->cpu = cpumask_first(&plr->d->hdr.cpu_mask);
if (!cpu_online(plr->cpu)) {
rdt_last_cmd_printf("CPU %u associated with cache not online\n",
@@ -306,15 +308,11 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
goto out_region;
}
- ci = get_cpu_cacheinfo(plr->cpu);
-
- plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
-
- for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == plr->s->res->cache_level) {
- plr->line_size = ci->info_list[i].coherency_line_size;
- return 0;
- }
+ ci = get_cpu_cacheinfo_level(plr->cpu, scope);
+ if (ci) {
+ plr->line_size = ci->coherency_line_size;
+ plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
+ return 0;
}
ret = -1;
@@ -810,7 +808,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
* Return: true if @cbm overlaps with pseudo-locked region on @d, false
* otherwise.
*/
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
{
unsigned int cbm_len;
unsigned long cbm_b;
@@ -837,11 +835,11 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm
* if it is not possible to test due to memory allocation issue,
* false otherwise.
*/
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
{
+ struct rdt_ctrl_domain *d_i;
cpumask_var_t cpu_with_psl;
struct rdt_resource *r;
- struct rdt_domain *d_i;
bool ret = false;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -855,10 +853,10 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
* associated with them.
*/
for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(d_i, &r->domains, list) {
+ list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) {
if (d_i->plr)
cpumask_or(cpu_with_psl, cpu_with_psl,
- &d_i->cpu_mask);
+ &d_i->hdr.cpu_mask);
}
}
@@ -866,7 +864,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
* Next test if new pseudo-locked region would intersect with
* existing region.
*/
- if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
+ if (cpumask_intersects(&d->hdr.cpu_mask, cpu_with_psl))
ret = true;
free_cpumask_var(cpu_with_psl);
@@ -1198,7 +1196,7 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
}
plr->thread_done = 0;
- cpu = cpumask_first(&plr->d->cpu_mask);
+ cpu = cpumask_first(&plr->d->hdr.cpu_mask);
if (!cpu_online(cpu)) {
ret = -ENODEV;
goto out;
@@ -1528,7 +1526,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* may be scheduled elsewhere and invalidate entries in the
* pseudo-locked region.
*/
- if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->hdr.cpu_mask)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 02f213f1c51c..d7163b764c62 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -12,7 +12,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
@@ -92,13 +91,13 @@ void rdt_last_cmd_printf(const char *fmt, ...)
void rdt_staged_configs_clear(void)
{
+ struct rdt_ctrl_domain *dom;
struct rdt_resource *r;
- struct rdt_domain *dom;
lockdep_assert_held(&rdtgroup_mutex);
for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(dom, &r->domains, list)
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
memset(dom->staged_config, 0, sizeof(dom->staged_config));
}
}
@@ -317,7 +316,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
- mask = &rdtgrp->plr->d->cpu_mask;
+ mask = &rdtgrp->plr->d->hdr.cpu_mask;
seq_printf(s, is_cpu_list(of) ?
"%*pbl\n" : "%*pb\n",
cpumask_pr_args(mask));
@@ -1012,7 +1011,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
unsigned long sw_shareable = 0, hw_shareable = 0;
unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_resource *r = s->res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
bool sep = false;
@@ -1021,12 +1020,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
if (sep)
seq_putc(seq, ';');
sw_shareable = 0;
exclusive = 0;
- seq_printf(seq, "%d=", dom->id);
+ seq_printf(seq, "%d=", dom->hdr.id);
for (i = 0; i < closids_supported(); i++) {
if (!closid_allocated(i))
continue;
@@ -1243,7 +1242,7 @@ static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
*
* Return: false if CBM does not overlap, true if it does.
*/
-static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid,
enum resctrl_conf_type type, bool exclusive)
{
@@ -1298,7 +1297,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
*
* Return: true if CBM overlap detected, false if there is no overlap
*/
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
@@ -1329,10 +1328,10 @@ bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
+ struct rdt_ctrl_domain *d;
struct resctrl_schema *s;
struct rdt_resource *r;
bool has_cache = false;
- struct rdt_domain *d;
u32 ctrl;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -1343,7 +1342,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
continue;
has_cache = true;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
ctrl = resctrl_arch_get_config(r, d, closid,
s->conf_type);
if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
@@ -1448,20 +1447,19 @@ out:
* bitmap functions work correctly.
*/
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
- struct rdt_domain *d, unsigned long cbm)
+ struct rdt_ctrl_domain *d, unsigned long cbm)
{
- struct cpu_cacheinfo *ci;
unsigned int size = 0;
- int num_b, i;
+ struct cacheinfo *ci;
+ int num_b;
+
+ if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
+ return size;
num_b = bitmap_weight(&cbm, r->cache.cbm_len);
- ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
- for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == r->cache_level) {
- size = ci->info_list[i].size / r->cache.cbm_len * num_b;
- break;
- }
- }
+ ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
+ if (ci)
+ size = ci->size / r->cache.cbm_len * num_b;
return size;
}
@@ -1477,9 +1475,9 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
{
struct resctrl_schema *schema;
enum resctrl_conf_type type;
+ struct rdt_ctrl_domain *d;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
- struct rdt_domain *d;
unsigned int size;
int ret = 0;
u32 closid;
@@ -1503,7 +1501,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
}
goto out;
}
@@ -1515,7 +1513,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
type = schema->conf_type;
sep = false;
seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (sep)
seq_putc(s, ';');
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
@@ -1533,7 +1531,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
else
size = rdtgroup_cbm_to_size(r, d, ctrl);
}
- seq_printf(s, "%d=%u", d->id, size);
+ seq_printf(s, "%d=%u", d->hdr.id, size);
sep = true;
}
seq_putc(s, '\n');
@@ -1591,21 +1589,21 @@ static void mon_event_config_read(void *info)
mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
}
-static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
+static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info)
{
- smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
+ smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1);
}
static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
{
struct mon_config_info mon_info = {0};
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
bool sep = false;
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
if (sep)
seq_puts(s, ";");
@@ -1613,7 +1611,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
mon_info.evtid = evtid;
mondata_config_read(dom, &mon_info);
- seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
+ seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
sep = true;
}
seq_puts(s, "\n");
@@ -1658,7 +1656,7 @@ static void mon_event_config_write(void *info)
}
static void mbm_config_write_domain(struct rdt_resource *r,
- struct rdt_domain *d, u32 evtid, u32 val)
+ struct rdt_mon_domain *d, u32 evtid, u32 val)
{
struct mon_config_info mon_info = {0};
@@ -1679,7 +1677,7 @@ static void mbm_config_write_domain(struct rdt_resource *r,
* are scoped at the domain level. Writing any of these MSRs
* on one CPU is observed by all the CPUs in the domain.
*/
- smp_call_function_any(&d->cpu_mask, mon_event_config_write,
+ smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write,
&mon_info, 1);
/*
@@ -1699,7 +1697,7 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
char *dom_str = NULL, *id_str;
unsigned long dom_id, val;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
@@ -1729,8 +1727,8 @@ next:
return -EINVAL;
}
- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
mbm_config_write_domain(r, d, evtid, val);
goto next;
}
@@ -2258,9 +2256,9 @@ static inline bool is_mba_linear(void)
static int set_cache_qos_cfg(int level, bool enable)
{
void (*update)(void *arg);
+ struct rdt_ctrl_domain *d;
struct rdt_resource *r_l;
cpumask_var_t cpu_mask;
- struct rdt_domain *d;
int cpu;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -2277,14 +2275,14 @@ static int set_cache_qos_cfg(int level, bool enable)
return -ENOMEM;
r_l = &rdt_resources_all[level].r_resctrl;
- list_for_each_entry(d, &r_l->domains, list) {
+ list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
if (r_l->cache.arch_has_per_cpu_cfg)
/* Pick all the CPUs in the domain instance */
- for_each_cpu(cpu, &d->cpu_mask)
+ for_each_cpu(cpu, &d->hdr.cpu_mask)
cpumask_set_cpu(cpu, cpu_mask);
else
/* Pick one CPU from each domain instance to update MSR */
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+ cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
}
/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
@@ -2310,10 +2308,10 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
l3_qos_cfg_update(&hw_res->cdp_enabled);
}
-static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
+static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
u32 num_closid = resctrl_arch_get_num_closid(r);
- int cpu = cpumask_any(&d->cpu_mask);
+ int cpu = cpumask_any(&d->hdr.cpu_mask);
int i;
d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
@@ -2328,7 +2326,7 @@ static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
}
static void mba_sc_domain_destroy(struct rdt_resource *r,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
kfree(d->mbps_val);
d->mbps_val = NULL;
@@ -2336,14 +2334,18 @@ static void mba_sc_domain_destroy(struct rdt_resource *r,
/*
* MBA software controller is supported only if
- * MBM is supported and MBA is in linear scale.
+ * MBM is supported and MBA is in linear scale,
+ * and the MBM monitor scope is the same as MBA
+ * control scope.
*/
static bool supports_mba_mbps(void)
{
+ struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
return (is_mbm_local_enabled() &&
- r->alloc_capable && is_mba_linear());
+ r->alloc_capable && is_mba_linear() &&
+ r->ctrl_scope == rmbm->mon_scope);
}
/*
@@ -2354,7 +2356,7 @@ static int set_mba_sc(bool mba_sc)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
u32 num_closid = resctrl_arch_get_num_closid(r);
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int i;
if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
@@ -2362,7 +2364,7 @@ static int set_mba_sc(bool mba_sc)
r->membw.mba_sc = mba_sc;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
for (i = 0; i < num_closid; i++)
d->mbps_val[i] = MBA_MAX_MBPS;
}
@@ -2626,7 +2628,7 @@ static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
unsigned long flags = RFTYPE_CTRL_BASE;
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
struct rdt_resource *r;
int ret;
@@ -2701,7 +2703,7 @@ static int rdt_get_tree(struct fs_context *fc)
if (is_mbm_enabled()) {
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- list_for_each_entry(dom, &r->domains, list)
+ list_for_each_entry(dom, &r->mon_domains, hdr.list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
RESCTRL_PICK_ANY_CPU);
}
@@ -2751,6 +2753,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct fs_parse_result result;
+ const char *msg;
int opt;
opt = fs_parse(fc, rdt_fs_parameters, param, &result);
@@ -2765,8 +2768,9 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->enable_cdpl2 = true;
return 0;
case Opt_mba_mbps:
+ msg = "mba_MBps requires local MBM and linear scale MBA at L3 scope";
if (!supports_mba_mbps())
- return -EINVAL;
+ return invalfc(fc, msg);
ctx->enable_mba_mbps = true;
return 0;
case Opt_debug:
@@ -2811,9 +2815,9 @@ static int rdt_init_fs_context(struct fs_context *fc)
static int reset_all_ctrls(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_ctrl_domain *hw_dom;
struct msr_param msr_param;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int i;
/* Walking r->domains, ensure it can't race with cpuhp */
@@ -2825,16 +2829,16 @@ static int reset_all_ctrls(struct rdt_resource *r)
/*
* Disable resource control for this resource by setting all
- * CBMs in all domains to the maximum mask value. Pick one CPU
+ * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
* from each domain to update the MSRs below.
*/
- list_for_each_entry(d, &r->domains, list) {
- hw_dom = resctrl_to_arch_dom(d);
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ hw_dom = resctrl_to_arch_ctrl_dom(d);
for (i = 0; i < hw_res->num_closid; i++)
hw_dom->ctrl_val[i] = r->default_ctrl;
msr_param.dom = d;
- smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1);
+ smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
}
return 0;
@@ -3002,62 +3006,126 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
return ret;
}
+static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
+{
+ struct kernfs_node *kn;
+
+ kn = kernfs_find_and_get(pkn, name);
+ if (!kn)
+ return;
+ kernfs_put(kn);
+
+ if (kn->dir.subdirs <= 1)
+ kernfs_remove(kn);
+ else
+ kernfs_remove_by_name(kn, subname);
+}
+
/*
* Remove all subdirectories of mon_data of ctrl_mon groups
- * and monitor groups with given domain id.
+ * and monitor groups for the given domain.
+ * Remove files and directories containing "sum" of domain data
+ * when last domain being summed is removed.
*/
static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- unsigned int dom_id)
+ struct rdt_mon_domain *d)
{
struct rdtgroup *prgrp, *crgrp;
+ char subname[32];
+ bool snc_mode;
char name[32];
+ snc_mode = r->mon_scope == RESCTRL_L3_NODE;
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ if (snc_mode)
+ sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
+
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- sprintf(name, "mon_%s_%02d", r->name, dom_id);
- kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
+ mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
- kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
+ mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
}
}
-static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
- struct rdt_domain *d,
- struct rdt_resource *r, struct rdtgroup *prgrp)
+static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
+ struct rdt_resource *r, struct rdtgroup *prgrp,
+ bool do_sum)
{
+ struct rmid_read rr = {0};
union mon_data_bits priv;
- struct kernfs_node *kn;
struct mon_evt *mevt;
- struct rmid_read rr;
- char name[32];
int ret;
- sprintf(name, "mon_%s_%02d", r->name, d->id);
- /* create the directory */
- kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
- if (IS_ERR(kn))
- return PTR_ERR(kn);
-
- ret = rdtgroup_kn_set_ugid(kn);
- if (ret)
- goto out_destroy;
-
- if (WARN_ON(list_empty(&r->evt_list))) {
- ret = -EPERM;
- goto out_destroy;
- }
+ if (WARN_ON(list_empty(&r->evt_list)))
+ return -EPERM;
priv.u.rid = r->rid;
- priv.u.domid = d->id;
+ priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
+ priv.u.sum = do_sum;
list_for_each_entry(mevt, &r->evt_list, list) {
priv.u.evtid = mevt->evtid;
ret = mon_addfile(kn, mevt->name, priv.priv);
if (ret)
+ return ret;
+
+ if (!do_sum && is_mbm_event(mevt->evtid))
+ mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
+ }
+
+ return 0;
+}
+
+static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
+ struct rdt_mon_domain *d,
+ struct rdt_resource *r, struct rdtgroup *prgrp)
+{
+ struct kernfs_node *kn, *ckn;
+ char name[32];
+ bool snc_mode;
+ int ret = 0;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ snc_mode = r->mon_scope == RESCTRL_L3_NODE;
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ kn = kernfs_find_and_get(parent_kn, name);
+ if (kn) {
+ /*
+ * rdtgroup_mutex will prevent this directory from being
+ * removed. No need to keep this hold.
+ */
+ kernfs_put(kn);
+ } else {
+ kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret)
+ goto out_destroy;
+ ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
+ if (ret)
+ goto out_destroy;
+ }
+
+ if (snc_mode) {
+ sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
+ ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(ckn)) {
+ ret = -EINVAL;
+ goto out_destroy;
+ }
+
+ ret = rdtgroup_kn_set_ugid(ckn);
+ if (ret)
goto out_destroy;
- if (is_mbm_event(mevt->evtid))
- mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
+ ret = mon_add_all_files(ckn, d, r, prgrp, false);
+ if (ret)
+ goto out_destroy;
}
+
kernfs_activate(kn);
return 0;
@@ -3071,7 +3139,7 @@ out_destroy:
* and "monitor" groups with given domain id.
*/
static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- struct rdt_domain *d)
+ struct rdt_mon_domain *d)
{
struct kernfs_node *parent_kn;
struct rdtgroup *prgrp, *crgrp;
@@ -3093,13 +3161,13 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
struct rdt_resource *r,
struct rdtgroup *prgrp)
{
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
int ret;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
if (ret)
return ret;
@@ -3198,7 +3266,7 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
* Set the RDT domain up to start off with all usable allocations. That is,
* all shareable and unused bits. All-zero CBM is invalid.
*/
-static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
+static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
u32 closid)
{
enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
@@ -3258,7 +3326,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
*/
tmp_cbm = cfg->new_ctrl;
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
+ rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
return -ENOSPC;
}
cfg->have_new_ctrl = true;
@@ -3278,10 +3346,10 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
*/
static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
{
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int ret;
- list_for_each_entry(d, &s->res->domains, list) {
+ list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
ret = __init_one_rdt_domain(d, s, closid);
if (ret < 0)
return ret;
@@ -3294,9 +3362,9 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
{
struct resctrl_staged_config *cfg;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (is_mba_sc(r)) {
d->mbps_val[closid] = MBA_MAX_MBPS;
continue;
@@ -3920,29 +3988,33 @@ static void __init rdtgroup_setup_default(void)
mutex_unlock(&rdtgroup_mutex);
}
-static void domain_destroy_mon_state(struct rdt_domain *d)
+static void domain_destroy_mon_state(struct rdt_mon_domain *d)
{
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
}
-void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
mutex_lock(&rdtgroup_mutex);
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
mba_sc_domain_destroy(r, d);
- if (!r->mon_capable)
- goto out_unlock;
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ mutex_lock(&rdtgroup_mutex);
/*
* If resctrl is mounted, remove all the
* per domain monitor data directories.
*/
if (resctrl_mounted && resctrl_arch_mon_capable())
- rmdir_mondata_subdir_allrdtgrp(r, d->id);
+ rmdir_mondata_subdir_allrdtgrp(r, d);
if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
@@ -3961,11 +4033,10 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
domain_destroy_mon_state(d);
-out_unlock:
mutex_unlock(&rdtgroup_mutex);
}
-static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
+static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
{
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;
@@ -3996,7 +4067,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
return 0;
}
-int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
int err = 0;
@@ -4005,11 +4076,18 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
/* RDT_RESOURCE_MBA is never mon_capable */
err = mba_sc_domain_allocate(r, d);
- goto out_unlock;
}
- if (!r->mon_capable)
- goto out_unlock;
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ int err;
+
+ mutex_lock(&rdtgroup_mutex);
err = domain_setup_mon_state(r, d);
if (err)
@@ -4060,8 +4138,8 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
void resctrl_offline_cpu(unsigned int cpu)
{
struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ struct rdt_mon_domain *d;
struct rdtgroup *rdtgrp;
- struct rdt_domain *d;
mutex_lock(&rdtgroup_mutex);
list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
@@ -4074,7 +4152,7 @@ void resctrl_offline_cpu(unsigned int cpu)
if (!l3->mon_capable)
goto out_unlock;
- d = get_domain_from_cpu(cpu, l3);
+ d = get_mon_domain_from_cpu(cpu, l3);
if (d) {
if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
cancel_delayed_work(&d->mbm_over);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index af5aa2c754c2..c84c30188fdf 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
+ { X86_FEATURE_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 },
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index d419deed6a48..7d476fa697ca 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -84,9 +84,9 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext)
/*
* If leaf 0xb is available, then the domain shifts are set
- * already and nothing to do here.
+ * already and nothing to do here. Only valid for family >= 0x17.
*/
- if (!has_topoext) {
+ if (!has_topoext && tscan->c->x86 >= 0x17) {
/*
* Leaf 0x80000008 set the CORE domain shift already.
* Update the SMT domain, but do not propagate it.
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 11f83d07925e..00189cdeb775 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -41,80 +41,97 @@
#define CPUID_VMWARE_INFO_LEAF 0x40000000
#define CPUID_VMWARE_FEATURES_LEAF 0x40000010
-#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0)
-#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1)
-#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
-
-#define VMWARE_CMD_GETVERSION 10
-#define VMWARE_CMD_GETHZ 45
-#define VMWARE_CMD_GETVCPU_INFO 68
-#define VMWARE_CMD_LEGACY_X2APIC 3
-#define VMWARE_CMD_VCPU_RESERVED 31
-#define VMWARE_CMD_STEALCLOCK 91
+#define GETVCPU_INFO_LEGACY_X2APIC BIT(3)
+#define GETVCPU_INFO_VCPU_RESERVED BIT(31)
#define STEALCLOCK_NOT_AVAILABLE (-1)
#define STEALCLOCK_DISABLED 0
#define STEALCLOCK_ENABLED 1
-#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
- __asm__("inl (%%dx), %%eax" : \
- "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
- "a"(VMWARE_HYPERVISOR_MAGIC), \
- "c"(VMWARE_CMD_##cmd), \
- "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) : \
- "memory")
-
-#define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx) \
- __asm__("vmcall" : \
- "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
- "a"(VMWARE_HYPERVISOR_MAGIC), \
- "c"(VMWARE_CMD_##cmd), \
- "d"(0), "b"(UINT_MAX) : \
- "memory")
-
-#define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx) \
- __asm__("vmmcall" : \
- "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
- "a"(VMWARE_HYPERVISOR_MAGIC), \
- "c"(VMWARE_CMD_##cmd), \
- "d"(0), "b"(UINT_MAX) : \
- "memory")
-
-#define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do { \
- switch (vmware_hypercall_mode) { \
- case CPUID_VMWARE_FEATURES_ECX_VMCALL: \
- VMWARE_VMCALL(cmd, eax, ebx, ecx, edx); \
- break; \
- case CPUID_VMWARE_FEATURES_ECX_VMMCALL: \
- VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx); \
- break; \
- default: \
- VMWARE_PORT(cmd, eax, ebx, ecx, edx); \
- break; \
- } \
- } while (0)
-
struct vmware_steal_time {
union {
- uint64_t clock; /* stolen time counter in units of vtsc */
+ u64 clock; /* stolen time counter in units of vtsc */
struct {
/* only for little-endian */
- uint32_t clock_low;
- uint32_t clock_high;
+ u32 clock_low;
+ u32 clock_high;
};
};
- uint64_t reserved[7];
+ u64 reserved[7];
};
static unsigned long vmware_tsc_khz __ro_after_init;
static u8 vmware_hypercall_mode __ro_after_init;
+unsigned long vmware_hypercall_slow(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5)
+{
+ unsigned long out0, rbx, rcx, rdx, rsi, rdi;
+
+ switch (vmware_hypercall_mode) {
+ case CPUID_VMWARE_FEATURES_ECX_VMCALL:
+ asm_inline volatile ("vmcall"
+ : "=a" (out0), "=b" (rbx), "=c" (rcx),
+ "=d" (rdx), "=S" (rsi), "=D" (rdi)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ break;
+ case CPUID_VMWARE_FEATURES_ECX_VMMCALL:
+ asm_inline volatile ("vmmcall"
+ : "=a" (out0), "=b" (rbx), "=c" (rcx),
+ "=d" (rdx), "=S" (rsi), "=D" (rdi)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ break;
+ default:
+ asm_inline volatile ("movw %[port], %%dx; inl (%%dx), %%eax"
+ : "=a" (out0), "=b" (rbx), "=c" (rcx),
+ "=d" (rdx), "=S" (rsi), "=D" (rdi)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ break;
+ }
+
+ if (out1)
+ *out1 = rbx;
+ if (out2)
+ *out2 = rcx;
+ if (out3)
+ *out3 = rdx;
+ if (out4)
+ *out4 = rsi;
+ if (out5)
+ *out5 = rdi;
+
+ return out0;
+}
+
static inline int __vmware_platform(void)
{
- uint32_t eax, ebx, ecx, edx;
- VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx);
- return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
+ u32 eax, ebx, ecx;
+
+ eax = vmware_hypercall3(VMWARE_CMD_GETVERSION, 0, &ebx, &ecx);
+ return eax != UINT_MAX && ebx == VMWARE_HYPERVISOR_MAGIC;
}
static unsigned long vmware_get_tsc_khz(void)
@@ -166,21 +183,12 @@ static void __init vmware_cyc2ns_setup(void)
pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
}
-static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
+static int vmware_cmd_stealclock(u32 addr_hi, u32 addr_lo)
{
- uint32_t result, info;
-
- asm volatile (VMWARE_HYPERCALL :
- "=a"(result),
- "=c"(info) :
- "a"(VMWARE_HYPERVISOR_MAGIC),
- "b"(0),
- "c"(VMWARE_CMD_STEALCLOCK),
- "d"(0),
- "S"(arg1),
- "D"(arg2) :
- "memory");
- return result;
+ u32 info;
+
+ return vmware_hypercall5(VMWARE_CMD_STEALCLOCK, 0, 0, addr_hi, addr_lo,
+ &info);
}
static bool stealclock_enable(phys_addr_t pa)
@@ -215,15 +223,15 @@ static bool vmware_is_stealclock_available(void)
* Return:
* The steal clock reading in ns.
*/
-static uint64_t vmware_steal_clock(int cpu)
+static u64 vmware_steal_clock(int cpu)
{
struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
- uint64_t clock;
+ u64 clock;
if (IS_ENABLED(CONFIG_64BIT))
clock = READ_ONCE(steal->clock);
else {
- uint32_t initial_high, low, high;
+ u32 initial_high, low, high;
do {
initial_high = READ_ONCE(steal->clock_high);
@@ -235,7 +243,7 @@ static uint64_t vmware_steal_clock(int cpu)
high = READ_ONCE(steal->clock_high);
} while (initial_high != high);
- clock = ((uint64_t)high << 32) | low;
+ clock = ((u64)high << 32) | low;
}
return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
@@ -389,13 +397,13 @@ static void __init vmware_set_capabilities(void)
static void __init vmware_platform_setup(void)
{
- uint32_t eax, ebx, ecx, edx;
- uint64_t lpj, tsc_khz;
+ u32 eax, ebx, ecx;
+ u64 lpj, tsc_khz;
- VMWARE_CMD(GETHZ, eax, ebx, ecx, edx);
+ eax = vmware_hypercall3(VMWARE_CMD_GETHZ, UINT_MAX, &ebx, &ecx);
if (ebx != UINT_MAX) {
- lpj = tsc_khz = eax | (((uint64_t)ebx) << 32);
+ lpj = tsc_khz = eax | (((u64)ebx) << 32);
do_div(tsc_khz, 1000);
WARN_ON(tsc_khz >> 32);
pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
@@ -446,7 +454,7 @@ static u8 __init vmware_select_hypercall(void)
* If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode
* intentionally defaults to 0.
*/
-static uint32_t __init vmware_platform(void)
+static u32 __init vmware_platform(void)
{
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
unsigned int eax;
@@ -474,12 +482,65 @@ static uint32_t __init vmware_platform(void)
/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
static bool __init vmware_legacy_x2apic_available(void)
{
- uint32_t eax, ebx, ecx, edx;
- VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
- return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) &&
- (eax & BIT(VMWARE_CMD_LEGACY_X2APIC));
+ u32 eax;
+
+ eax = vmware_hypercall1(VMWARE_CMD_GETVCPU_INFO, 0);
+ return !(eax & GETVCPU_INFO_VCPU_RESERVED) &&
+ (eax & GETVCPU_INFO_LEGACY_X2APIC);
}
+#ifdef CONFIG_INTEL_TDX_GUEST
+/*
+ * TDCALL[TDG.VP.VMCALL] uses %rax (arg0) and %rcx (arg2). Therefore,
+ * we remap those registers to %r12 and %r13, respectively.
+ */
+unsigned long vmware_tdx_hypercall(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5)
+{
+ struct tdx_module_args args = {};
+
+ if (!hypervisor_is_type(X86_HYPER_VMWARE)) {
+ pr_warn_once("Incorrect usage\n");
+ return ULONG_MAX;
+ }
+
+ if (cmd & ~VMWARE_CMD_MASK) {
+ pr_warn_once("Out of range command %lx\n", cmd);
+ return ULONG_MAX;
+ }
+
+ args.rbx = in1;
+ args.rdx = in3;
+ args.rsi = in4;
+ args.rdi = in5;
+ args.r10 = VMWARE_TDX_VENDOR_LEAF;
+ args.r11 = VMWARE_TDX_HCALL_FUNC;
+ args.r12 = VMWARE_HYPERVISOR_MAGIC;
+ args.r13 = cmd;
+ /* CPL */
+ args.r15 = 0;
+
+ __tdx_hypercall(&args);
+
+ if (out1)
+ *out1 = args.rbx;
+ if (out2)
+ *out2 = args.r13;
+ if (out3)
+ *out3 = args.rdx;
+ if (out4)
+ *out4 = args.rsi;
+ if (out5)
+ *out5 = args.rdi;
+
+ return args.r12;
+}
+EXPORT_SYMBOL_GPL(vmware_tdx_hypercall);
+#endif
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
struct pt_regs *regs)