summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>2021-08-12 18:52:23 +0530
committerMichael Ellerman <mpe@ellerman.id.au>2021-08-13 22:04:27 +1000
commit1c6b5a7e74052768977855f95d6b8812f6e7772c (patch)
treedb1f7c5cb7d5460db4f53f73d231527e4fe64055 /arch/powerpc/mm/numa.c
parentef31cb83d19c4c589d650747cd5a7e502be9f665 (diff)
powerpc/pseries: Add support for FORM2 associativity
PAPR interface currently supports two different ways of communicating resource grouping details to the OS. These are referred to as Form 0 and Form 1 associativity grouping. Form 0 is the older format and is now considered deprecated. This patch adds another resource grouping named FORM2. Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210812132223.225214-6-aneesh.kumar@linux.ibm.com
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c187
1 files changed, 152 insertions, 35 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fe3a9a38eddf..fe9c6ced40b2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -56,12 +56,17 @@ static int n_mem_addr_cells, n_mem_size_cells;
#define FORM0_AFFINITY 0
#define FORM1_AFFINITY 1
+#define FORM2_AFFINITY 2
static int affinity_form;
#define MAX_DISTANCE_REF_POINTS 4
static int distance_ref_points_depth;
static const __be32 *distance_ref_points;
static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
+static int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = {
+ [0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 }
+};
+static int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
/*
* Allocate node_to_cpumask_map based on number of available nodes
@@ -166,6 +171,54 @@ static void unmap_cpu_from_node(unsigned long cpu)
}
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
+static int __associativity_to_nid(const __be32 *associativity,
+ int max_array_sz)
+{
+ int nid;
+ /*
+ * primary_domain_index is 1 based array index.
+ */
+ int index = primary_domain_index - 1;
+
+ if (!numa_enabled || index >= max_array_sz)
+ return NUMA_NO_NODE;
+
+ nid = of_read_number(&associativity[index], 1);
+
+ /* POWER4 LPAR uses 0xffff as invalid node */
+ if (nid == 0xffff || nid >= nr_node_ids)
+ nid = NUMA_NO_NODE;
+ return nid;
+}
+/*
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
+ * info is found.
+ */
+static int associativity_to_nid(const __be32 *associativity)
+{
+ int array_sz = of_read_number(associativity, 1);
+
+ /* Skip the first element in the associativity array */
+ return __associativity_to_nid((associativity + 1), array_sz);
+}
+
+static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+ int dist;
+ int node1, node2;
+
+ node1 = associativity_to_nid(cpu1_assoc);
+ node2 = associativity_to_nid(cpu2_assoc);
+
+ dist = numa_distance_table[node1][node2];
+ if (dist <= LOCAL_DISTANCE)
+ return 0;
+ else if (dist <= REMOTE_DISTANCE)
+ return 1;
+ else
+ return 2;
+}
+
static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
{
int dist = 0;
@@ -186,8 +239,9 @@ int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
{
/* We should not get called with FORM0 */
VM_WARN_ON(affinity_form == FORM0_AFFINITY);
-
- return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
+ if (affinity_form == FORM1_AFFINITY)
+ return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
+ return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc);
}
/* must hold reference to node during call */
@@ -201,7 +255,9 @@ int __node_distance(int a, int b)
int i;
int distance = LOCAL_DISTANCE;
- if (affinity_form == FORM0_AFFINITY)
+ if (affinity_form == FORM2_AFFINITY)
+ return numa_distance_table[a][b];
+ else if (affinity_form == FORM0_AFFINITY)
return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
for (i = 0; i < distance_ref_points_depth; i++) {
@@ -216,37 +272,6 @@ int __node_distance(int a, int b)
}
EXPORT_SYMBOL(__node_distance);
-static int __associativity_to_nid(const __be32 *associativity,
- int max_array_sz)
-{
- int nid;
- /*
- * primary_domain_index is 1 based array index.
- */
- int index = primary_domain_index - 1;
-
- if (!numa_enabled || index >= max_array_sz)
- return NUMA_NO_NODE;
-
- nid = of_read_number(&associativity[index], 1);
-
- /* POWER4 LPAR uses 0xffff as invalid node */
- if (nid == 0xffff || nid >= nr_node_ids)
- nid = NUMA_NO_NODE;
- return nid;
-}
-/*
- * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
- * info is found.
- */
-static int associativity_to_nid(const __be32 *associativity)
-{
- int array_sz = of_read_number(associativity, 1);
-
- /* Skip the first element in the associativity array */
- return __associativity_to_nid((associativity + 1), array_sz);
-}
-
/* Returns the nid associated with the given device tree node,
* or -1 if not found.
*/
@@ -320,6 +345,8 @@ static void initialize_form1_numa_distance(const __be32 *associativity)
*/
void update_numa_distance(struct device_node *node)
{
+ int nid;
+
if (affinity_form == FORM0_AFFINITY)
return;
else if (affinity_form == FORM1_AFFINITY) {
@@ -332,6 +359,84 @@ void update_numa_distance(struct device_node *node)
initialize_form1_numa_distance(associativity);
return;
}
+
+ /* FORM2 affinity */
+ nid = of_node_to_nid_single(node);
+ if (nid == NUMA_NO_NODE)
+ return;
+
+ /*
+ * With FORM2 we expect NUMA distance of all possible NUMA
+ * nodes to be provided during boot.
+ */
+ WARN(numa_distance_table[nid][nid] == -1,
+ "NUMA distance details for node %d not provided\n", nid);
+}
+
+/*
+ * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
+ * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
+ */
+static void initialize_form2_numa_distance_lookup_table(void)
+{
+ int i, j;
+ struct device_node *root;
+ const __u8 *numa_dist_table;
+ const __be32 *numa_lookup_index;
+ int numa_dist_table_length;
+ int max_numa_index, distance_index;
+
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ root = of_find_node_by_path("/ibm,opal");
+ else
+ root = of_find_node_by_path("/rtas");
+ if (!root)
+ root = of_find_node_by_path("/");
+
+ numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL);
+ max_numa_index = of_read_number(&numa_lookup_index[0], 1);
+
+ /* first element of the array is the size and is encode-int */
+ numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL);
+ numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1);
+ /* Skip the size which is encoded int */
+ numa_dist_table += sizeof(__be32);
+
+ pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n",
+ numa_dist_table_length, max_numa_index);
+
+ for (i = 0; i < max_numa_index; i++)
+ /* +1 skip the max_numa_index in the property */
+ numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
+
+
+ if (numa_dist_table_length != max_numa_index * max_numa_index) {
+ WARN(1, "Wrong NUMA distance information\n");
+ /* consider everybody else just remote. */
+ for (i = 0; i < max_numa_index; i++) {
+ for (j = 0; j < max_numa_index; j++) {
+ int nodeA = numa_id_index_table[i];
+ int nodeB = numa_id_index_table[j];
+
+ if (nodeA == nodeB)
+ numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE;
+ else
+ numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE;
+ }
+ }
+ }
+
+ distance_index = 0;
+ for (i = 0; i < max_numa_index; i++) {
+ for (j = 0; j < max_numa_index; j++) {
+ int nodeA = numa_id_index_table[i];
+ int nodeB = numa_id_index_table[j];
+
+ numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++];
+ pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]);
+ }
+ }
+ of_node_put(root);
}
static int __init find_primary_domain_index(void)
@@ -344,6 +449,9 @@ static int __init find_primary_domain_index(void)
*/
if (firmware_has_feature(FW_FEATURE_OPAL)) {
affinity_form = FORM1_AFFINITY;
+ } else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) {
+ dbg("Using form 2 affinity\n");
+ affinity_form = FORM2_AFFINITY;
} else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
dbg("Using form 1 affinity\n");
affinity_form = FORM1_AFFINITY;
@@ -388,9 +496,12 @@ static int __init find_primary_domain_index(void)
index = of_read_number(&distance_ref_points[1], 1);
} else {
+ /*
+ * Both FORM1 and FORM2 affinity find the primary domain details
+ * at the same offset.
+ */
index = of_read_number(distance_ref_points, 1);
}
-
/*
* Warn and cap if the hardware supports more than
* MAX_DISTANCE_REF_POINTS domains.
@@ -820,6 +931,12 @@ static int __init parse_numa_properties(void)
dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index);
/*
+ * If it is FORM2 initialize the distance table here.
+ */
+ if (affinity_form == FORM2_AFFINITY)
+ initialize_form2_numa_distance_lookup_table();
+
+ /*
* Even though we connect cpus to numa domains later in SMP
* init, we need to know the node ids now. This is because
* each node to be onlined must have NODE_DATA etc backing it.