[PATCH v5 1/4] smpboot: introduce SDTL_INIT() helper to tidy sched topology setup
K Prateek Nayak
kprateek.nayak at amd.com
Fri Jul 11 15:50:30 AEST 2025
On 7/10/2025 4:27 PM, Li Chen wrote:
> /*
> * .. and append 'j' levels of NUMA goodness.
> */
> for (j = 1; j < nr_levels; i++, j++) {
> - tl[i] = (struct sched_domain_topology_level){
> - .mask = sd_numa_mask,
> - .sd_flags = cpu_numa_flags,
> - .flags = SDTL_OVERLAP,
> - .numa_level = j,
> - SD_INIT_NAME(NUMA)
> - };
> + tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA);
> + tl[i].numa_level = j;
> + tl[i].flags = SDTL_OVERLAP;
Tangential discussion: I was looking at this and was wondering why we
need a "tl->flags" when there is already sd_flags() function and we can
simply add SD_OVERLAP to sd_numa_flags().
I think "tl->flags" was needed when the idea of overlap domains was
added in commit e3589f6c81e4 ("sched: Allow for overlapping sched_domain
spans") when it depended on "FORCE_SD_OVERLAP" sched_feat() which
allowed toggling this off but that was done away with in commit
af85596c74de ("sched/topology: Remove FORCE_SD_OVERLAP") so perhaps we
can get rid of it now?
Relying on SD_NUMA should be enough currently. Peter, Valentin, what do
you think of something like below?
(Build and boot tested on top of this series on tip:sched/core)
diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
index b04a5d04dee9..42839cfa2778 100644
--- a/include/linux/sched/sd_flags.h
+++ b/include/linux/sched/sd_flags.h
@@ -153,14 +153,6 @@ SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS)
*/
SD_FLAG(SD_PREFER_SIBLING, SDF_NEEDS_GROUPS)
-/*
- * sched_groups of this level overlap
- *
- * SHARED_PARENT: Set for all NUMA levels above NODE.
- * NEEDS_GROUPS: Overlaps can only exist with more than one group.
- */
-SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
-
/*
* Cross-node balancing
*
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 0d5daaa277b7..5263746b63e8 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -175,8 +175,6 @@ bool cpus_share_resources(int this_cpu, int that_cpu);
typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
typedef int (*sched_domain_flags_f)(void);
-#define SDTL_OVERLAP 0x01
-
struct sd_data {
struct sched_domain *__percpu *sd;
struct sched_domain_shared *__percpu *sds;
@@ -187,7 +185,6 @@ struct sd_data {
struct sched_domain_topology_level {
sched_domain_mask_f mask;
sched_domain_flags_f sd_flags;
- int flags;
int numa_level;
struct sd_data data;
char *name;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 20a845697c1d..b9b4bbbf0af6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9926,9 +9926,9 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
min_capacity = ULONG_MAX;
max_capacity = 0;
- if (child->flags & SD_OVERLAP) {
+ if (child->flags & SD_NUMA) {
/*
- * SD_OVERLAP domains cannot assume that child groups
+ * SD_NUMA domains cannot assume that child groups
* span the current group.
*/
@@ -9941,7 +9941,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
}
} else {
/*
- * !SD_OVERLAP domains can assume that child groups
+ * !SD_NUMA domains can assume that child groups
* span the current group.
*/
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d01f5a49f2e7..977e133bb8a4 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -89,7 +89,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
break;
}
- if (!(sd->flags & SD_OVERLAP) &&
+ if (!(sd->flags & SD_NUMA) &&
cpumask_intersects(groupmask, sched_group_span(group))) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: repeated CPUs\n");
@@ -102,7 +102,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
group->sgc->id,
cpumask_pr_args(sched_group_span(group)));
- if ((sd->flags & SD_OVERLAP) &&
+ if ((sd->flags & SD_NUMA) &&
!cpumask_equal(group_balance_mask(group), sched_group_span(group))) {
printk(KERN_CONT " mask=%*pbl",
cpumask_pr_args(group_balance_mask(group)));
@@ -1344,7 +1344,7 @@ void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio)
* "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu"
* which is shared by all the overlapping groups.
*/
- WARN_ON_ONCE(sd->flags & SD_OVERLAP);
+ WARN_ON_ONCE(sd->flags & SD_NUMA);
sg = sd->groups;
if (cpu != sg->asym_prefer_cpu) {
@@ -2016,7 +2016,6 @@ void sched_init_numa(int offline_node)
for (j = 1; j < nr_levels; i++, j++) {
tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA);
tl[i].numa_level = j;
- tl[i].flags = SDTL_OVERLAP;
}
sched_domain_topology_saved = sched_domain_topology;
@@ -2327,7 +2326,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
if (sdd->sd) {
sd = *per_cpu_ptr(sdd->sd, j);
- if (sd && (sd->flags & SD_OVERLAP))
+ if (sd && (sd->flags & SD_NUMA))
free_sched_groups(sd->groups, 0);
kfree(*per_cpu_ptr(sdd->sd, j));
}
@@ -2393,9 +2392,13 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
id_seen = sched_domains_tmpmask2;
for_each_sd_topology(tl) {
+ int tl_common_flags = 0;
+
+ if (tl->sd_flags)
+ tl_common_flags = (*tl->sd_flags)();
/* NUMA levels are allowed to overlap */
- if (tl->flags & SDTL_OVERLAP)
+ if (tl_common_flags & SD_NUMA)
continue;
cpumask_clear(covered);
@@ -2466,8 +2469,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
if (tl == sched_domain_topology)
*per_cpu_ptr(d.sd, i) = sd;
- if (tl->flags & SDTL_OVERLAP)
- sd->flags |= SD_OVERLAP;
if (cpumask_equal(cpu_map, sched_domain_span(sd)))
break;
}
@@ -2480,7 +2481,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
for_each_cpu(i, cpu_map) {
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
sd->span_weight = cpumask_weight(sched_domain_span(sd));
- if (sd->flags & SD_OVERLAP) {
+ if (sd->flags & SD_NUMA) {
if (build_overlap_sched_groups(sd, i))
goto error;
} else {
---
We can also keep SD_OVERLAP and only remove SDTL_OVERLAP, tl->flags if
that is preferred or just have them both if you see a future !NUMA
usecases for overlapping domains.
> }
>
> sched_domain_topology_saved = sched_domain_topology;
--
Thanks and Regards,
Prateek
More information about the Linuxppc-dev
mailing list