2 years ago · ca2ddd30a1
--- a/patches/6.2/0014-intel-thread-director.patch
+++ b/patches/6.2/0014-intel-thread-director.patch
@@ -0,0 +1,3268 @@
 
				+From bd2bba4036cb8c95f83e45cd4d8b22369fe6d0cb Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:29 -0800
			
 
				+Subject: [PATCH] sched/fair: Generalize asym_packing logic for SMT cores
			
 
				+
			
 
				+When doing asym_packing load balancing between cores, all we care is that
			
 
				+the destination core is fully idle (including SMT siblings, if any) and
			
 
				+that the busiest candidate scheduling group has exactly one busy CPU. It is
			
 
				+irrelevant whether the candidate busiest core is non-SMT, SMT2, SMT4, SMT8,
			
 
				+etc.
			
 
				+
			
 
				+Do not handle the candidate busiest non-SMT vs SMT cases separately. Simply
			
 
				+do the two checks described above. Let find_busiest_group() handle bigger
			
 
				+imbalances in the number of idle CPUs.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Reviewed-by: Len Brown <len.brown@intel.com>
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 41 ++++++++++++++---------------------------
			
 
				+ 1 file changed, 14 insertions(+), 27 deletions(-)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index 0f8736991427..4509086a60a0 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -9124,13 +9124,11 @@ group_type group_classify(unsigned int imbalance_pct,
			
 
				+  * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
			
 
				+  * only if @dst_cpu has higher priority.
			
 
				+  *
			
 
				+- * If both @dst_cpu and @sg have SMT siblings, and @sg has exactly one more
			
 
				+- * busy CPU than @sds::local, let @dst_cpu pull tasks if it has higher priority.
			
 
				+- * Bigger imbalances in the number of busy CPUs will be dealt with in
			
 
				+- * update_sd_pick_busiest().
			
 
				+- *
			
 
				+- * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
			
 
				+- * of @dst_cpu are idle and @sg has lower priority.
			
 
				++ * If @dst_cpu has SMT siblings, check if there are no running tasks in
			
 
				++ * @sds::local. In such case, decide based on the priority of @sg. Do it only
			
 
				++ * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
			
 
				++ * imbalances in the number of busy CPUs will be dealt with in
			
 
				++ * find_busiest_group().
			
 
				+  *
			
 
				+  * Return: true if @dst_cpu can pull tasks, false otherwise.
			
 
				+  */
			
 
				+@@ -9139,12 +9137,10 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
			
 
				+ 				    struct sched_group *sg)
			
 
				+ {
			
 
				+ #ifdef CONFIG_SCHED_SMT
			
 
				+-	bool local_is_smt, sg_is_smt;
			
 
				++	bool local_is_smt;
			
 
				+ 	int sg_busy_cpus;
			
 
				+ 
			
 
				+ 	local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
			
 
				+-	sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
			
 
				+-
			
 
				+ 	sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
			
 
				+ 
			
 
				+ 	if (!local_is_smt) {
			
 
				+@@ -9165,25 +9161,16 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
			
 
				+ 		return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
			
 
				+ 	}
			
 
				+ 
			
 
				+-	/* @dst_cpu has SMT siblings. */
			
 
				+-
			
 
				+-	if (sg_is_smt) {
			
 
				+-		int local_busy_cpus = sds->local->group_weight -
			
 
				+-				      sds->local_stat.idle_cpus;
			
 
				+-		int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
			
 
				+-
			
 
				+-		if (busy_cpus_delta == 1)
			
 
				+-			return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
			
 
				+-
			
 
				+-		return false;
			
 
				+-	}
			
 
				+-
			
 
				+ 	/*
			
 
				+-	 * @sg does not have SMT siblings. Ensure that @sds::local does not end
			
 
				+-	 * up with more than one busy SMT sibling and only pull tasks if there
			
 
				+-	 * are not busy CPUs (i.e., no CPU has running tasks).
			
 
				++	 * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
			
 
				++	 * all its siblings are idle (moving tasks between physical cores in
			
 
				++	 * which some SMT siblings are busy results in the same throughput).
			
 
				++	 *
			
 
				++	 * If the difference in the number of busy CPUs is two or more, let
			
 
				++	 * find_busiest_group() take care of it. We only care if @sg has
			
 
				++	 * exactly one busy CPU. This covers SMT and non-SMT sched groups.
			
 
				+ 	 */
			
 
				+-	if (!sds->local_stat.sum_nr_running)
			
 
				++	if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
			
 
				+ 		return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
			
 
				+ 
			
 
				+ 	return false;
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From ee23d606abde99fbab94fa15ce3ef701b430d8a7 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:30 -0800
			
 
				+Subject: [PATCH] sched/fair: Move is_core_idle() out of CONFIG_NUMA
			
 
				+
			
 
				+asym_packing needs this function to determine whether an SMT core is a
			
 
				+suitable destination for load balancing.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 34 +++++++++++++++++-----------------
			
 
				+ 1 file changed, 17 insertions(+), 17 deletions(-)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index 4509086a60a0..d58df9c6a88c 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -1064,6 +1064,23 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				+  * Scheduling class queueing methods:
			
 
				+  */
			
 
				+ 
			
 
				++static inline bool is_core_idle(int cpu)
			
 
				++{
			
 
				++#ifdef CONFIG_SCHED_SMT
			
 
				++	int sibling;
			
 
				++
			
 
				++	for_each_cpu(sibling, cpu_smt_mask(cpu)) {
			
 
				++		if (cpu == sibling)
			
 
				++			continue;
			
 
				++
			
 
				++		if (!idle_cpu(sibling))
			
 
				++			return false;
			
 
				++	}
			
 
				++#endif
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				+ #ifdef CONFIG_NUMA
			
 
				+ #define NUMA_IMBALANCE_MIN 2
			
 
				+ 
			
 
				+@@ -1700,23 +1717,6 @@ struct numa_stats {
			
 
				+ 	int idle_cpu;
			
 
				+ };
			
 
				+ 
			
 
				+-static inline bool is_core_idle(int cpu)
			
 
				+-{
			
 
				+-#ifdef CONFIG_SCHED_SMT
			
 
				+-	int sibling;
			
 
				+-
			
 
				+-	for_each_cpu(sibling, cpu_smt_mask(cpu)) {
			
 
				+-		if (cpu == sibling)
			
 
				+-			continue;
			
 
				+-
			
 
				+-		if (!idle_cpu(sibling))
			
 
				+-			return false;
			
 
				+-	}
			
 
				+-#endif
			
 
				+-
			
 
				+-	return true;
			
 
				+-}
			
 
				+-
			
 
				+ struct task_numa_env {
			
 
				+ 	struct task_struct *p;
			
 
				+ 
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 995477b05ed2c85c3b3b796118468c1c66edb37e Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:31 -0800
			
 
				+Subject: [PATCH] sched/fair: Only do asym_packing load balancing from fully
			
 
				+ idle SMT cores
			
 
				+
			
 
				+When balancing load between cores, all the SMT siblings of the destination
			
 
				+CPU, if any, must be idle. Otherwise, pulling new tasks degrades the
			
 
				+throughput of the busy SMT siblings. The overall throughput of the system
			
 
				+remains the same.
			
 
				+
			
 
				+When balancing load within an SMT core this consideration is not relevant
			
 
				+relevant. Follow the priorities that hardware indicates.
			
 
				+
			
 
				+Using is_core_idle() renders checking !sds->local_stat.sum_nr_running
			
 
				+redundant. Remove it.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Suggested-by: Valentin Schneider <vschneid@redhat.com>
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 34 +++++++++++++++++++++++++---------
			
 
				+ 1 file changed, 25 insertions(+), 9 deletions(-)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index d58df9c6a88c..1b134a2f0585 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -9120,12 +9120,14 @@ group_type group_classify(unsigned int imbalance_pct,
			
 
				+  * Check the state of the SMT siblings of both @sds::local and @sg and decide
			
 
				+  * if @dst_cpu can pull tasks.
			
 
				+  *
			
 
				++ * This function must be called only if all the SMT siblings of @dst_cpu are
			
 
				++ * idle, if any.
			
 
				++ *
			
 
				+  * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
			
 
				+  * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
			
 
				+  * only if @dst_cpu has higher priority.
			
 
				+  *
			
 
				+- * If @dst_cpu has SMT siblings, check if there are no running tasks in
			
 
				+- * @sds::local. In such case, decide based on the priority of @sg. Do it only
			
 
				++ * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
			
 
				+  * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
			
 
				+  * imbalances in the number of busy CPUs will be dealt with in
			
 
				+  * find_busiest_group().
			
 
				+@@ -9162,15 +9164,13 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	/*
			
 
				+-	 * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
			
 
				+-	 * all its siblings are idle (moving tasks between physical cores in
			
 
				+-	 * which some SMT siblings are busy results in the same throughput).
			
 
				++	 * @dst_cpu has SMT siblings and are also idle.
			
 
				+ 	 *
			
 
				+ 	 * If the difference in the number of busy CPUs is two or more, let
			
 
				+ 	 * find_busiest_group() take care of it. We only care if @sg has
			
 
				+ 	 * exactly one busy CPU. This covers SMT and non-SMT sched groups.
			
 
				+ 	 */
			
 
				+-	if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
			
 
				++	if (sg_busy_cpus == 1)
			
 
				+ 		return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
			
 
				+ 
			
 
				+ 	return false;
			
 
				+@@ -9184,7 +9184,14 @@ static inline bool
			
 
				+ sched_asym(struct lb_env *env, struct sd_lb_stats *sds,  struct sg_lb_stats *sgs,
			
 
				+ 	   struct sched_group *group)
			
 
				+ {
			
 
				+-	/* Only do SMT checks if either local or candidate have SMT siblings */
			
 
				++	/*
			
 
				++	 * If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
			
 
				++	 * is not sufficient. We need to make sure the whole core is idle.
			
 
				++	 */
			
 
				++	if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
			
 
				++		return false;
			
 
				++
			
 
				++	/* Only do SMT checks if either local or candidate have SMT siblings. */
			
 
				+ 	if ((sds->local->flags & SD_SHARE_CPUCAPACITY) ||
			
 
				+ 	    (group->flags & SD_SHARE_CPUCAPACITY))
			
 
				+ 		return asym_smt_can_pull_tasks(env->dst_cpu, sds, sgs, group);
			
 
				+@@ -11131,8 +11138,17 @@ static void nohz_balancer_kick(struct rq *rq)
			
 
				+ 		 */
			
 
				+ 		for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
			
 
				+ 			if (sched_asym_prefer(i, cpu)) {
			
 
				+-				flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
			
 
				+-				goto unlock;
			
 
				++				/*
			
 
				++				 * Always do ASYM_PACKING balance in the SMT
			
 
				++				 * domain. In upper domains, the core must be
			
 
				++				 * fully idle.
			
 
				++				 */
			
 
				++				if (sd->flags & SD_SHARE_CPUCAPACITY ||
			
 
				++				    (!(sd->flags & SD_SHARE_CPUCAPACITY) &&
			
 
				++				     is_core_idle(i))) {
			
 
				++					flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
			
 
				++					goto unlock;
			
 
				++				}
			
 
				+ 			}
			
 
				+ 		}
			
 
				+ 	}
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 9941162cdf50901818e53975e116f317cb38173d Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:32 -0800
			
 
				+Subject: [PATCH] sched/fair: Let low-priority cores help high-priority busy
			
 
				+ SMT cores
			
 
				+
			
 
				+Using asym_packing priorities within an SMT core is straightforward. Just
			
 
				+follow the priorities that hardware indicates.
			
 
				+
			
 
				+When balancing load from an SMT core, also consider the idle of its
			
 
				+siblings. Priorities do not reflect that an SMT core divides its throughput
			
 
				+among all its busy siblings. They only makes sense when exactly one sibling
			
 
				+is busy.
			
 
				+
			
 
				+Indicate that active balance is needed if the destination CPU has lower
			
 
				+priority than the source CPU but the latter has busy SMT siblings.
			
 
				+
			
 
				+Make find_busiest_queue() not skip higher-priority SMT cores with more than
			
 
				+busy sibling.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Suggested-by: Valentin Schneider <vschneid@redhat.com>
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 31 ++++++++++++++++++++++++++-----
			
 
				+ 1 file changed, 26 insertions(+), 5 deletions(-)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index 1b134a2f0585..1255d99877fe 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -10306,11 +10306,20 @@ static struct rq *find_busiest_queue(struct lb_env *env,
			
 
				+ 		    nr_running == 1)
			
 
				+ 			continue;
			
 
				+ 
			
 
				+-		/* Make sure we only pull tasks from a CPU of lower priority */
			
 
				++		/*
			
 
				++		 * Make sure we only pull tasks from a CPU of lower priority
			
 
				++		 * when balancing between SMT siblings.
			
 
				++		 *
			
 
				++		 * If balancing between cores, let lower priority CPUs help
			
 
				++		 * SMT cores with more than one busy sibling.
			
 
				++		 */
			
 
				+ 		if ((env->sd->flags & SD_ASYM_PACKING) &&
			
 
				+ 		    sched_asym_prefer(i, env->dst_cpu) &&
			
 
				+-		    nr_running == 1)
			
 
				+-			continue;
			
 
				++		    nr_running == 1) {
			
 
				++			if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
			
 
				++			    (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
			
 
				++				continue;
			
 
				++		}
			
 
				+ 
			
 
				+ 		switch (env->migration_type) {
			
 
				+ 		case migrate_load:
			
 
				+@@ -10400,8 +10409,20 @@ asym_active_balance(struct lb_env *env)
			
 
				+ 	 * lower priority CPUs in order to pack all tasks in the
			
 
				+ 	 * highest priority CPUs.
			
 
				+ 	 */
			
 
				+-	return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
			
 
				+-	       sched_asym_prefer(env->dst_cpu, env->src_cpu);
			
 
				++	if (env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING)) {
			
 
				++		/* Always obey priorities between SMT siblings. */
			
 
				++		if (env->sd->flags & SD_SHARE_CPUCAPACITY)
			
 
				++			return sched_asym_prefer(env->dst_cpu, env->src_cpu);
			
 
				++
			
 
				++		/*
			
 
				++		 * A lower priority CPU can help an SMT core with more than one
			
 
				++		 * busy sibling.
			
 
				++		 */
			
 
				++		return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
			
 
				++		       !is_core_idle(env->src_cpu);
			
 
				++	}
			
 
				++
			
 
				++	return false;
			
 
				+ }
			
 
				+ 
			
 
				+ static inline bool
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From b6fe3b340efe48625bcd5d6f9080a77e39be6a3f Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:33 -0800
			
 
				+Subject: [PATCH] sched/fair: Keep a fully_busy SMT sched group as busiest
			
 
				+
			
 
				+When comparing two fully_busy scheduling groups, keep the current busiest
			
 
				+group if it represents an SMT core. Tasks in such scheduling group share
			
 
				+CPU resources and need more help than tasks in a non-SMT fully_busy group.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 16 ++++++++++++++--
			
 
				+ 1 file changed, 14 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index 1255d99877fe..ed1f13fa32f8 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -9384,10 +9384,22 @@ static bool update_sd_pick_busiest(struct lb_env *env,
			
 
				+ 		 * contention when accessing shared HW resources.
			
 
				+ 		 *
			
 
				+ 		 * XXX for now avg_load is not computed and always 0 so we
			
 
				+-		 * select the 1st one.
			
 
				++		 * select the 1st one, except if @sg is composed of SMT
			
 
				++		 * siblings.
			
 
				+ 		 */
			
 
				+-		if (sgs->avg_load <= busiest->avg_load)
			
 
				++
			
 
				++		if (sgs->avg_load < busiest->avg_load)
			
 
				+ 			return false;
			
 
				++
			
 
				++		if (sgs->avg_load == busiest->avg_load) {
			
 
				++			/*
			
 
				++			 * SMT sched groups need more help than non-SMT groups.
			
 
				++			 * If @sg happens to also be SMT, either choice is good.
			
 
				++			 */
			
 
				++			if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
			
 
				++				return false;
			
 
				++		}
			
 
				++
			
 
				+ 		break;
			
 
				+ 
			
 
				+ 	case group_has_spare:
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 33b193a8846ec229414b71da7d26977fdfb3c9b3 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:34 -0800
			
 
				+Subject: [PATCH] sched/fair: Use the prefer_sibling flag of the current sched
			
 
				+ domain
			
 
				+
			
 
				+SD_PREFER_SIBLING is set from the SMT scheduling domain up to the first
			
 
				+non-NUMA domain (the exception is systems with SD_ASYM_CPUCAPACITY).
			
 
				+
			
 
				+Above the SMT sched domain, all domains have a child. The SD_PREFER_
			
 
				+SIBLING is honored always regardless of the scheduling domain at which the
			
 
				+load balance takes place.
			
 
				+
			
 
				+There are cases, however, in which the busiest CPU's sched domain has
			
 
				+child but the destination CPU's does not. Consider, for instance a non-SMT
			
 
				+core (or an SMT core with only one online sibling) doing load balance with
			
 
				+an SMT core at the MC level. SD_PREFER_SIBLING will not be honored. We are
			
 
				+left with a fully busy SMT core and an idle non-SMT core.
			
 
				+
			
 
				+Avoid inconsistent behavior. Use the prefer_sibling behavior at the current
			
 
				+scheduling domain, not its child.
			
 
				+
			
 
				+The NUMA sched domain does not have the SD_PREFER_SIBLING flag. Thus, we
			
 
				+will not spread load among NUMA sched groups, as desired.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Suggested-by: Valentin Schneider <vschneid@redhat.com>
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 10 +++++-----
			
 
				+ 1 file changed, 5 insertions(+), 5 deletions(-)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index ed1f13fa32f8..9d94ba3f6726 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -9874,7 +9874,6 @@ static void update_idle_cpu_scan(struct lb_env *env,
			
 
				+ 
			
 
				+ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)
			
 
				+ {
			
 
				+-	struct sched_domain *child = env->sd->child;
			
 
				+ 	struct sched_group *sg = env->sd->groups;
			
 
				+ 	struct sg_lb_stats *local = &sds->local_stat;
			
 
				+ 	struct sg_lb_stats tmp_sgs;
			
 
				+@@ -9915,9 +9914,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
			
 
				+ 		sg = sg->next;
			
 
				+ 	} while (sg != env->sd->groups);
			
 
				+ 
			
 
				+-	/* Tag domain that child domain prefers tasks go to siblings first */
			
 
				+-	sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
			
 
				+-
			
 
				++	/*
			
 
				++	 * Tag domain that @env::sd prefers to spread excess tasks among
			
 
				++	 * sibling sched groups.
			
 
				++	 */
			
 
				++	sds->prefer_sibling = env->sd->flags & SD_PREFER_SIBLING;
			
 
				+ 
			
 
				+ 	if (env->sd->flags & SD_NUMA)
			
 
				+ 		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
			
 
				+@@ -10216,7 +10217,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
			
 
				+ 			goto out_balanced;
			
 
				+ 	}
			
 
				+ 
			
 
				+-	/* Try to move all excess tasks to child's sibling domain */
			
 
				+ 	if (sds.prefer_sibling && local->group_type == group_has_spare &&
			
 
				+ 	    busiest->sum_nr_running > local->sum_nr_running + 1)
			
 
				+ 		goto force_balance;
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 3cada1dc1aaa1bdbbacb9973c3ed69851a9a8054 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:35 -0800
			
 
				+Subject: [PATCH] sched/fair: Do not even the number of busy CPUs via
			
 
				+ asym_packing
			
 
				+
			
 
				+Now that find_busiest_group() triggers load balancing between a fully_
			
 
				+busy SMT2 core and an idle non-SMT core, it is no longer needed to force
			
 
				+balancing via asym_packing. Use asym_packing only as intended: when there
			
 
				+is high-priority CPU that is idle.
			
 
				+
			
 
				+After this change, the same logic apply to SMT and non-SMT local groups.
			
 
				+Simplify asym_smt_can_pull_tasks() accordingly.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 37 +++++--------------------------------
			
 
				+ 1 file changed, 5 insertions(+), 32 deletions(-)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index 9d94ba3f6726..e5079ee882ff 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -9117,20 +9117,15 @@ group_type group_classify(unsigned int imbalance_pct,
			
 
				+  * @sgs:	Load-balancing statistics of the candidate busiest group
			
 
				+  * @sg:		The candidate busiest group
			
 
				+  *
			
 
				+- * Check the state of the SMT siblings of both @sds::local and @sg and decide
			
 
				+- * if @dst_cpu can pull tasks.
			
 
				++ * Check the state of the SMT siblings of @sg and decide if @dst_cpu can pull
			
 
				++ * tasks.
			
 
				+  *
			
 
				+  * This function must be called only if all the SMT siblings of @dst_cpu are
			
 
				+  * idle, if any.
			
 
				+  *
			
 
				+- * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
			
 
				+- * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
			
 
				+- * only if @dst_cpu has higher priority.
			
 
				+- *
			
 
				+- * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
			
 
				+- * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
			
 
				+- * imbalances in the number of busy CPUs will be dealt with in
			
 
				+- * find_busiest_group().
			
 
				++ * @dst_cpu can pull tasks if @sg has exactly one busy CPU (i.e., one more than
			
 
				++ * @sds::local) and has lower group priority than @sds::local. Bigger imbalances
			
 
				++ * in the number of busy CPUs will be dealt with in find_busiest_group().
			
 
				+  *
			
 
				+  * Return: true if @dst_cpu can pull tasks, false otherwise.
			
 
				+  */
			
 
				+@@ -9139,33 +9134,11 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
			
 
				+ 				    struct sched_group *sg)
			
 
				+ {
			
 
				+ #ifdef CONFIG_SCHED_SMT
			
 
				+-	bool local_is_smt;
			
 
				+ 	int sg_busy_cpus;
			
 
				+ 
			
 
				+-	local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
			
 
				+ 	sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
			
 
				+ 
			
 
				+-	if (!local_is_smt) {
			
 
				+-		/*
			
 
				+-		 * If we are here, @dst_cpu is idle and does not have SMT
			
 
				+-		 * siblings. Pull tasks if candidate group has two or more
			
 
				+-		 * busy CPUs.
			
 
				+-		 */
			
 
				+-		if (sg_busy_cpus >= 2) /* implies sg_is_smt */
			
 
				+-			return true;
			
 
				+-
			
 
				+-		/*
			
 
				+-		 * @dst_cpu does not have SMT siblings. @sg may have SMT
			
 
				+-		 * siblings and only one is busy. In such case, @dst_cpu
			
 
				+-		 * can help if it has higher priority and is idle (i.e.,
			
 
				+-		 * it has no running tasks).
			
 
				+-		 */
			
 
				+-		return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
			
 
				+-	}
			
 
				+-
			
 
				+ 	/*
			
 
				+-	 * @dst_cpu has SMT siblings and are also idle.
			
 
				+-	 *
			
 
				+ 	 * If the difference in the number of busy CPUs is two or more, let
			
 
				+ 	 * find_busiest_group() take care of it. We only care if @sg has
			
 
				+ 	 * exactly one busy CPU. This covers SMT and non-SMT sched groups.
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 9502629c285b133622a66eafae6983fe717906cb Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:36 -0800
			
 
				+Subject: [PATCH] sched/topology: Remove SHARED_CHILD from ASYM_PACKING
			
 
				+
			
 
				+Only x86 and Power7 use ASYM_PACKING. They use it differently.
			
 
				+
			
 
				+Power7 has cores of equal priority, but the SMT siblings of a core have
			
 
				+different priorities. Parent scheduling domains do not need (nor have) the
			
 
				+ASYM_PACKING flag. SHARED_CHILD is not needed. Using SHARED_PARENT would
			
 
				+cause the topology debug code to complain.
			
 
				+
			
 
				+X86 has cores of different priority, but all the SMT siblings of the core
			
 
				+have equal priority. It needs ASYM_PACKING at the MC level, but not at the
			
 
				+SMT level (it also needs it at upper levels if they have scheduling groups
			
 
				+of different priority). Removing ASYM_PACKING from the SMT domain causes
			
 
				+the topology debug code to complain.
			
 
				+
			
 
				+Remove SHARED_CHILD for now. We still need a topology check that satisfies
			
 
				+both architectures.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Suggested-by: Valentin Schneider <vschneid@redhat.com>
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ include/linux/sched/sd_flags.h | 5 +----
			
 
				+ 1 file changed, 1 insertion(+), 4 deletions(-)
			
 
				+
			
 
				+diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
			
 
				+index 57bde66d95f7..800238854ba5 100644
			
 
				+--- a/include/linux/sched/sd_flags.h
			
 
				++++ b/include/linux/sched/sd_flags.h
			
 
				+@@ -132,12 +132,9 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
			
 
				+ /*
			
 
				+  * Place busy tasks earlier in the domain
			
 
				+  *
			
 
				+- * SHARED_CHILD: Usually set on the SMT level. Technically could be set further
			
 
				+- *               up, but currently assumed to be set from the base domain
			
 
				+- *               upwards (see update_top_cache_domain()).
			
 
				+  * NEEDS_GROUPS: Load balancing flag.
			
 
				+  */
			
 
				+-SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
			
 
				++SD_FLAG(SD_ASYM_PACKING,  SDF_NEEDS_GROUPS)
			
 
				+ 
			
 
				+ /*
			
 
				+  * Prefer to place tasks in a sibling domain
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 503eed0aa6bc93d5bbae5c0ecb5dd98221ac70d3 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:37 -0800
			
 
				+Subject: [PATCH] x86/sched: Remove SD_ASYM_PACKING from the SMT domain flags
			
 
				+
			
 
				+There is no difference between any of the SMT siblings of a physical core.
			
 
				+Do not do asym_packing load balancing at this level.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ arch/x86/kernel/smpboot.c | 2 +-
			
 
				+ 1 file changed, 1 insertion(+), 1 deletion(-)
			
 
				+
			
 
				+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
			
 
				+index 55cad72715d9..0213d066a9a9 100644
			
 
				+--- a/arch/x86/kernel/smpboot.c
			
 
				++++ b/arch/x86/kernel/smpboot.c
			
 
				+@@ -547,7 +547,7 @@ static int x86_core_flags(void)
			
 
				+ #ifdef CONFIG_SCHED_SMT
			
 
				+ static int x86_smt_flags(void)
			
 
				+ {
			
 
				+-	return cpu_smt_flags() | x86_sched_itmt_flags();
			
 
				++	return cpu_smt_flags();
			
 
				+ }
			
 
				+ #endif
			
 
				+ #ifdef CONFIG_SCHED_CLUSTER
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 1344221f62b96498586051f3e2a6c1e9524eebf3 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 20:58:38 -0800
			
 
				+Subject: [PATCH] x86/sched/itmt: Give all SMT siblings of a core the same
			
 
				+ priority
			
 
				+
			
 
				+X86 does not have the SD_ASYM_PACKING flag in the SMT domain. The scheduler
			
 
				+knows how to handle SMT and non-SMT cores of different priority. There is
			
 
				+no reason for SMT siblings of a core to have different priorities.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Reviewed-by: Len Brown <len.brown@intel.com>
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Tested-by: Zhang Rui <rui.zhang@intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ arch/x86/kernel/itmt.c | 23 +++++------------------
			
 
				+ 1 file changed, 5 insertions(+), 18 deletions(-)
			
 
				+
			
 
				+diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
			
 
				+index 9ff480e94511..6510883c5e81 100644
			
 
				+--- a/arch/x86/kernel/itmt.c
			
 
				++++ b/arch/x86/kernel/itmt.c
			
 
				+@@ -174,32 +174,19 @@ int arch_asym_cpu_priority(int cpu)
			
 
				+ 
			
 
				+ /**
			
 
				+  * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
			
 
				+- * @prio:	Priority of cpu core
			
 
				+- * @core_cpu:	The cpu number associated with the core
			
 
				++ * @prio:	Priority of @cpu
			
 
				++ * @cpu:	The CPU number
			
 
				+  *
			
 
				+  * The pstate driver will find out the max boost frequency
			
 
				+  * and call this function to set a priority proportional
			
 
				+- * to the max boost frequency. CPU with higher boost
			
 
				++ * to the max boost frequency. CPUs with higher boost
			
 
				+  * frequency will receive higher priority.
			
 
				+  *
			
 
				+  * No need to rebuild sched domain after updating
			
 
				+  * the CPU priorities. The sched domains have no
			
 
				+  * dependency on CPU priorities.
			
 
				+  */
			
 
				+-void sched_set_itmt_core_prio(int prio, int core_cpu)
			
 
				++void sched_set_itmt_core_prio(int prio, int cpu)
			
 
				+ {
			
 
				+-	int cpu, i = 1;
			
 
				+-
			
 
				+-	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
			
 
				+-		int smt_prio;
			
 
				+-
			
 
				+-		/*
			
 
				+-		 * Ensure that the siblings are moved to the end
			
 
				+-		 * of the priority chain and only used when
			
 
				+-		 * all other high priority cpus are out of capacity.
			
 
				+-		 */
			
 
				+-		smt_prio = prio * smp_num_siblings / (i * i);
			
 
				+-		per_cpu(sched_core_priority, cpu) = smt_prio;
			
 
				+-		i++;
			
 
				+-	}
			
 
				++	per_cpu(sched_core_priority, cpu) = prio;
			
 
				+ }
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 25de1f88b45889ab6b7d03acc4638c93f978e427 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:42 -0800
			
 
				+Subject: [PATCH] sched/task_struct: Introduce IPC classes of tasks
			
 
				+
			
 
				+On hybrid processors, the architecture differences between the types of
			
 
				+CPUs lead to different instructions-per-cycle (IPC) on each type of CPU.
			
 
				+IPCs may differ further by the type of instructions. Instructions can be
			
 
				+grouped into classes of similar IPCs.
			
 
				+
			
 
				+Hence, tasks can be classified into groups based on the type of
			
 
				+instructions they execute.
			
 
				+
			
 
				+Add a new member task_struct::ipcc to associate a particular task to
			
 
				+an IPC class that depends on the instructions it executes.
			
 
				+
			
 
				+The scheduler may use the IPC class of a task and data about the
			
 
				+performance among CPUs of a given IPC class to improve throughput. It
			
 
				+may, for instance, place certain classes of tasks on CPUs of higher
			
 
				+performance.
			
 
				+
			
 
				+The methods to determine the classification of a task and its relative
			
 
				+IPC score are specific to each CPU architecture.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ include/linux/sched.h | 10 ++++++++++
			
 
				+ init/Kconfig          | 12 ++++++++++++
			
 
				+ 2 files changed, 22 insertions(+)
			
 
				+
			
 
				+diff --git a/include/linux/sched.h b/include/linux/sched.h
			
 
				+index 853d08f7562b..f29294217885 100644
			
 
				+--- a/include/linux/sched.h
			
 
				++++ b/include/linux/sched.h
			
 
				+@@ -127,6 +127,8 @@ struct task_group;
			
 
				+ 					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
			
 
				+ 					 TASK_PARKED)
			
 
				+ 
			
 
				++#define IPC_CLASS_UNCLASSIFIED		0
			
 
				++
			
 
				+ #define task_is_running(task)		(READ_ONCE((task)->__state) == TASK_RUNNING)
			
 
				+ 
			
 
				+ #define task_is_traced(task)		((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
			
 
				+@@ -1522,6 +1524,14 @@ struct task_struct {
			
 
				+ 	union rv_task_monitor		rv[RV_PER_TASK_MONITORS];
			
 
				+ #endif
			
 
				+ 
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++	/*
			
 
				++	 * A hardware-defined classification of task that reflects but is
			
 
				++	 * not identical to the number of instructions per cycle.
			
 
				++	 */
			
 
				++	unsigned short			ipcc;
			
 
				++#endif
			
 
				++
			
 
				+ 	/*
			
 
				+ 	 * New fields for task_struct should be added above here, so that
			
 
				+ 	 * they are included in the randomized portion of task_struct.
			
 
				+diff --git a/init/Kconfig b/init/Kconfig
			
 
				+index 44e90b28a30f..24c5eec9d22e 100644
			
 
				+--- a/init/Kconfig
			
 
				++++ b/init/Kconfig
			
 
				+@@ -867,6 +867,18 @@ config UCLAMP_BUCKETS_COUNT
			
 
				+ 
			
 
				+ 	  If in doubt, use the default value.
			
 
				+ 
			
 
				++config IPC_CLASSES
			
 
				++	bool "IPC classes of tasks"
			
 
				++	depends on SMP
			
 
				++	help
			
 
				++	  If selected, each task is assigned a classification value that
			
 
				++	  reflects the type of instructions that the task executes. This
			
 
				++	  classification reflects but is not equal to the number of
			
 
				++	  instructions retired per cycle.
			
 
				++
			
 
				++	  The scheduler uses the classification value to improve the placement
			
 
				++	  of tasks.
			
 
				++
			
 
				+ endmenu
			
 
				+ 
			
 
				+ #
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From a0e3326c33d45e7c433635bc1d620b086731c1cf Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:43 -0800
			
 
				+Subject: [PATCH] sched: Add interfaces for IPC classes
			
 
				+
			
 
				+Add the interfaces that architectures shall implement to convey the data
			
 
				+to support IPC classes.
			
 
				+
			
 
				+arch_update_ipcc() updates the IPC classification of the current task as
			
 
				+given by hardware.
			
 
				+
			
 
				+arch_get_ipcc_score() provides a performance score for a given IPC class
			
 
				+when placed on a specific CPU. Higher scores indicate higher performance.
			
 
				+
			
 
				+When a driver or equivalent enablement code has configured the necessary
			
 
				+hardware to support IPC classes, it should call sched_enable_ipc_classes()
			
 
				+to notify the scheduler that it can start using IPC classes data.
			
 
				+
			
 
				+The number of classes and the score of each class of task are determined
			
 
				+by hardware.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ include/linux/sched/topology.h |  6 ++++
			
 
				+ kernel/sched/sched.h           | 66 ++++++++++++++++++++++++++++++++++
			
 
				+ kernel/sched/topology.c        |  9 +++++
			
 
				+ 3 files changed, 81 insertions(+)
			
 
				+
			
 
				+diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
			
 
				+index 816df6cc444e..5b084d3c9ad1 100644
			
 
				+--- a/include/linux/sched/topology.h
			
 
				++++ b/include/linux/sched/topology.h
			
 
				+@@ -280,4 +280,10 @@ static inline int task_node(const struct task_struct *p)
			
 
				+ 	return cpu_to_node(task_cpu(p));
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++extern void sched_enable_ipc_classes(void);
			
 
				++#else
			
 
				++static inline void sched_enable_ipc_classes(void) { }
			
 
				++#endif
			
 
				++
			
 
				+ #endif /* _LINUX_SCHED_TOPOLOGY_H */
			
 
				+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
			
 
				+index 771f8ddb7053..7ab65d3feaa1 100644
			
 
				+--- a/kernel/sched/sched.h
			
 
				++++ b/kernel/sched/sched.h
			
 
				+@@ -2526,6 +2526,72 @@ void arch_scale_freq_tick(void)
			
 
				+ }
			
 
				+ #endif
			
 
				+ 
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++DECLARE_STATIC_KEY_FALSE(sched_ipcc);
			
 
				++
			
 
				++static inline bool sched_ipcc_enabled(void)
			
 
				++{
			
 
				++	return static_branch_unlikely(&sched_ipcc);
			
 
				++}
			
 
				++
			
 
				++#ifndef arch_update_ipcc
			
 
				++/**
			
 
				++ * arch_update_ipcc() - Update the IPC class of the current task
			
 
				++ * @curr:		The current task
			
 
				++ *
			
 
				++ * Request that the IPC classification of @curr is updated.
			
 
				++ *
			
 
				++ * Returns: none
			
 
				++ */
			
 
				++static __always_inline
			
 
				++void arch_update_ipcc(struct task_struct *curr)
			
 
				++{
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				++#ifndef arch_get_ipcc_score
			
 
				++
			
 
				++#define SCHED_IPCC_SCORE_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
			
 
				++/**
			
 
				++ * arch_get_ipcc_score() - Get the IPC score of a class of task
			
 
				++ * @ipcc:	The IPC class
			
 
				++ * @cpu:	A CPU number
			
 
				++ *
			
 
				++ * The IPC performance scores reflects (but it is not identical to) the number
			
 
				++ * of instructions retired per cycle for a given IPC class. It is a linear and
			
 
				++ * abstract metric. Higher scores reflect better performance.
			
 
				++ *
			
 
				++ * The IPC score can be normalized with respect to the class, i, with the
			
 
				++ * highest IPC score on the CPU, c, with highest performance:
			
 
				++ *
			
 
				++ *            IPC(i, c)
			
 
				++ *  ------------------------------------ * SCHED_IPCC_SCORE_SCALE
			
 
				++ *     max(IPC(i, c) : (i, c))
			
 
				++ *
			
 
				++ * Scheduling schemes that want to use the IPC score along with other
			
 
				++ * normalized metrics for scheduling (e.g., CPU capacity) may need to normalize
			
 
				++ * it.
			
 
				++ *
			
 
				++ * Other scheduling schemes (e.g., asym_packing) do not need normalization.
			
 
				++ *
			
 
				++ * Returns the performance score of an IPC class, @ipcc, when running on @cpu.
			
 
				++ * Error when either @ipcc or @cpu are invalid.
			
 
				++ */
			
 
				++static __always_inline
			
 
				++unsigned long arch_get_ipcc_score(unsigned short ipcc, int cpu)
			
 
				++{
			
 
				++	return SCHED_IPCC_SCORE_SCALE;
			
 
				++}
			
 
				++#endif
			
 
				++#else /* CONFIG_IPC_CLASSES */
			
 
				++
			
 
				++#define arch_get_ipcc_score(ipcc, cpu) (-EINVAL)
			
 
				++#define arch_update_ipcc(curr)
			
 
				++
			
 
				++static inline bool sched_ipcc_enabled(void) { return false; }
			
 
				++
			
 
				++#endif /* CONFIG_IPC_CLASSES */
			
 
				++
			
 
				+ #ifndef arch_scale_freq_capacity
			
 
				+ /**
			
 
				+  * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
			
 
				+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
			
 
				+index 8739c2a5a54e..60e03d15f58c 100644
			
 
				+--- a/kernel/sched/topology.c
			
 
				++++ b/kernel/sched/topology.c
			
 
				+@@ -670,6 +670,15 @@ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
			
 
				+ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
			
 
				+ DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
			
 
				+ 
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++DEFINE_STATIC_KEY_FALSE(sched_ipcc);
			
 
				++
			
 
				++void sched_enable_ipc_classes(void)
			
 
				++{
			
 
				++	static_branch_enable_cpuslocked(&sched_ipcc);
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				+ static void update_top_cache_domain(int cpu)
			
 
				+ {
			
 
				+ 	struct sched_domain_shared *sds = NULL;
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From c18e80caa66e108ad250a79ee9688e07705830cf Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:44 -0800
			
 
				+Subject: [PATCH] sched/core: Initialize the IPC class of a new task
			
 
				+
			
 
				+New tasks shall start life as unclassified. They will be classified by
			
 
				+hardware when they run.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/core.c | 3 +++
			
 
				+ 1 file changed, 3 insertions(+)
			
 
				+
			
 
				+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
			
 
				+index 2a4918a1faa9..325b1d3cf7a8 100644
			
 
				+--- a/kernel/sched/core.c
			
 
				++++ b/kernel/sched/core.c
			
 
				+@@ -4424,6 +4424,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
			
 
				+ 	p->se.prev_sum_exec_runtime	= 0;
			
 
				+ 	p->se.nr_migrations		= 0;
			
 
				+ 	p->se.vruntime			= 0;
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++	p->ipcc				= IPC_CLASS_UNCLASSIFIED;
			
 
				++#endif
			
 
				+ 	INIT_LIST_HEAD(&p->se.group_node);
			
 
				+ 
			
 
				+ #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From b98df1322d063aee5015bf6fc751cf612151183c Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:45 -0800
			
 
				+Subject: [PATCH] sched/core: Add user_tick as argument to scheduler_tick()
			
 
				+
			
 
				+Differentiate between user and kernel ticks so that the scheduler updates
			
 
				+the IPC class of the current task during the former.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ include/linux/sched.h | 2 +-
			
 
				+ kernel/sched/core.c   | 2 +-
			
 
				+ kernel/time/timer.c   | 2 +-
			
 
				+ 3 files changed, 3 insertions(+), 3 deletions(-)
			
 
				+
			
 
				+diff --git a/include/linux/sched.h b/include/linux/sched.h
			
 
				+index f29294217885..4f96c3dd59d0 100644
			
 
				+--- a/include/linux/sched.h
			
 
				++++ b/include/linux/sched.h
			
 
				+@@ -293,7 +293,7 @@ enum {
			
 
				+ 	TASK_COMM_LEN = 16,
			
 
				+ };
			
 
				+ 
			
 
				+-extern void scheduler_tick(void);
			
 
				++extern void scheduler_tick(bool user_tick);
			
 
				+ 
			
 
				+ #define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
			
 
				+ 
			
 
				+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
			
 
				+index 325b1d3cf7a8..b438fc79f868 100644
			
 
				+--- a/kernel/sched/core.c
			
 
				++++ b/kernel/sched/core.c
			
 
				+@@ -5550,7 +5550,7 @@ static inline u64 cpu_resched_latency(struct rq *rq) { return 0; }
			
 
				+  * This function gets called by the timer code, with HZ frequency.
			
 
				+  * We call it with interrupts disabled.
			
 
				+  */
			
 
				+-void scheduler_tick(void)
			
 
				++void scheduler_tick(bool user_tick)
			
 
				+ {
			
 
				+ 	int cpu = smp_processor_id();
			
 
				+ 	struct rq *rq = cpu_rq(cpu);
			
 
				+diff --git a/kernel/time/timer.c b/kernel/time/timer.c
			
 
				+index 63a8ce7177dd..e15e24105891 100644
			
 
				+--- a/kernel/time/timer.c
			
 
				++++ b/kernel/time/timer.c
			
 
				+@@ -2073,7 +2073,7 @@ void update_process_times(int user_tick)
			
 
				+ 	if (in_irq())
			
 
				+ 		irq_work_tick();
			
 
				+ #endif
			
 
				+-	scheduler_tick();
			
 
				++	scheduler_tick(user_tick);
			
 
				+ 	if (IS_ENABLED(CONFIG_POSIX_TIMERS))
			
 
				+ 		run_posix_cpu_timers();
			
 
				+ }
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 736249a61b243746519f78008913237317180313 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:46 -0800
			
 
				+Subject: [PATCH] sched/core: Update the IPC class of the current task
			
 
				+
			
 
				+When supported, hardware monitors the instruction stream to classify the
			
 
				+current task. Hence, at userspace tick, we are ready to read the most
			
 
				+recent classification result for the current task.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/core.c | 3 +++
			
 
				+ 1 file changed, 3 insertions(+)
			
 
				+
			
 
				+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
			
 
				+index b438fc79f868..0ab39cc055c7 100644
			
 
				+--- a/kernel/sched/core.c
			
 
				++++ b/kernel/sched/core.c
			
 
				+@@ -5562,6 +5562,9 @@ void scheduler_tick(bool user_tick)
			
 
				+ 	if (housekeeping_cpu(cpu, HK_TYPE_TICK))
			
 
				+ 		arch_scale_freq_tick();
			
 
				+ 
			
 
				++	if (sched_ipcc_enabled() && user_tick)
			
 
				++		arch_update_ipcc(curr);
			
 
				++
			
 
				+ 	sched_clock_tick();
			
 
				+ 
			
 
				+ 	rq_lock(rq, &rf);
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From e466ceec97170f0038327d9402d1a7287bdfda01 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:47 -0800
			
 
				+Subject: [PATCH] sched/fair: Collect load-balancing stats for IPC classes
			
 
				+
			
 
				+When selecting a busiest scheduling group, the IPC class of the current
			
 
				+task can be used to select between two scheduling groups of types asym_
			
 
				+packing or fully_busy that are otherwise identical.
			
 
				+
			
 
				+Compute the IPC class performance score for a scheduling group. It
			
 
				+is the sum of the scores of the current tasks of all the runqueues.
			
 
				+
			
 
				+Also, keep track of the class of the task with the lowest IPC class score
			
 
				+in the scheduling group.
			
 
				+
			
 
				+These two metrics will be used during idle load balancing to compute the
			
 
				+current and the prospective IPC class score of a scheduling group.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 61 insertions(+)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index e5079ee882ff..a418164953c3 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -8767,6 +8767,11 @@ struct sg_lb_stats {
			
 
				+ 	unsigned int nr_numa_running;
			
 
				+ 	unsigned int nr_preferred_running;
			
 
				+ #endif
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++	unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
			
 
				++	unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
			
 
				++	unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
			
 
				++#endif
			
 
				+ };
			
 
				+ 
			
 
				+ /*
			
 
				+@@ -9110,6 +9115,59 @@ group_type group_classify(unsigned int imbalance_pct,
			
 
				+ 	return group_has_spare;
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
			
 
				++{
			
 
				++	/* All IPCC stats have been set to zero in update_sg_lb_stats(). */
			
 
				++	sgs->min_score = ULONG_MAX;
			
 
				++}
			
 
				++
			
 
				++/* Called only if cpu_of(@rq) is not idle and has tasks running. */
			
 
				++static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
			
 
				++				    struct rq *rq)
			
 
				++{
			
 
				++	struct task_struct *curr;
			
 
				++	unsigned short ipcc;
			
 
				++	unsigned long score;
			
 
				++
			
 
				++	if (!sched_ipcc_enabled())
			
 
				++		return;
			
 
				++
			
 
				++	curr = rcu_dereference(rq->curr);
			
 
				++	if (!curr || (curr->flags & PF_EXITING) || is_idle_task(curr) ||
			
 
				++	    task_is_realtime(curr) ||
			
 
				++	    !cpumask_test_cpu(dst_cpu, curr->cpus_ptr))
			
 
				++		return;
			
 
				++
			
 
				++	ipcc = curr->ipcc;
			
 
				++	score = arch_get_ipcc_score(ipcc, cpu_of(rq));
			
 
				++
			
 
				++	/*
			
 
				++	 * Ignore tasks with invalid scores. When finding the busiest group, we
			
 
				++	 * prefer those with higher sum_score. This group will not be selected.
			
 
				++	 */
			
 
				++	if (IS_ERR_VALUE(score))
			
 
				++		return;
			
 
				++
			
 
				++	sgs->sum_score += score;
			
 
				++
			
 
				++	if (score < sgs->min_score) {
			
 
				++		sgs->min_score = score;
			
 
				++		sgs->min_ipcc = ipcc;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++#else /* CONFIG_IPC_CLASSES */
			
 
				++static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
			
 
				++				    struct rq *rq)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
			
 
				++{
			
 
				++}
			
 
				++#endif /* CONFIG_IPC_CLASSES */
			
 
				++
			
 
				+ /**
			
 
				+  * asym_smt_can_pull_tasks - Check whether the load balancing CPU can pull tasks
			
 
				+  * @dst_cpu:	Destination CPU of the load balancing
			
 
				+@@ -9202,6 +9260,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
			
 
				+ 	int i, nr_running, local_group;
			
 
				+ 
			
 
				+ 	memset(sgs, 0, sizeof(*sgs));
			
 
				++	init_rq_ipcc_stats(sgs);
			
 
				+ 
			
 
				+ 	local_group = group == sds->local;
			
 
				+ 
			
 
				+@@ -9251,6 +9310,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
			
 
				+ 			if (sgs->group_misfit_task_load < load)
			
 
				+ 				sgs->group_misfit_task_load = load;
			
 
				+ 		}
			
 
				++
			
 
				++		update_sg_lb_ipcc_stats(env->dst_cpu, sgs, rq);
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	sgs->group_capacity = group->sgc->capacity;
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 493a3d6568c0ae6aa677dbcaa4f623b03a5feae0 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:48 -0800
			
 
				+Subject: [PATCH] sched/fair: Compute IPC class scores for load balancing
			
 
				+
			
 
				+Compute the joint total (both current and prospective) IPC class score of
			
 
				+a scheduling group and the local scheduling group.
			
 
				+
			
 
				+These IPCC statistics are used during idle load balancing. The candidate
			
 
				+scheduling group will have one fewer busy CPU after load balancing. This
			
 
				+observation is important for cores with SMT support.
			
 
				+
			
 
				+The IPCC score of scheduling groups composed of SMT siblings needs to
			
 
				+consider that the siblings share CPU resources. When computing the total
			
 
				+IPCC score of the scheduling group, divide score of each sibling by the
			
 
				+number of busy siblings.
			
 
				+
			
 
				+Collect IPCC statistics for asym_packing and fully_busy scheduling groups.
			
 
				+When picking a busiest group, they are used to break ties between otherwise
			
 
				+identical groups.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 68 insertions(+)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index a418164953c3..ae0c908be707 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -8771,6 +8771,8 @@ struct sg_lb_stats {
			
 
				+ 	unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
			
 
				+ 	unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
			
 
				+ 	unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
			
 
				++	long ipcc_score_after; /* Prospective IPCC score after load balancing */
			
 
				++	unsigned long ipcc_score_before; /* IPCC score before load balancing */
			
 
				+ #endif
			
 
				+ };
			
 
				+ 
			
 
				+@@ -9157,6 +9159,62 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
			
 
				+ 	}
			
 
				+ }
			
 
				+ 
			
 
				++static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
			
 
				++				      struct sched_group *sg,
			
 
				++				      struct lb_env *env)
			
 
				++{
			
 
				++	unsigned long score_on_dst_cpu, before;
			
 
				++	int busy_cpus;
			
 
				++	long after;
			
 
				++
			
 
				++	if (!sched_ipcc_enabled())
			
 
				++		return;
			
 
				++
			
 
				++	/*
			
 
				++	 * IPCC scores are only useful during idle load balancing. For now,
			
 
				++	 * only asym_packing uses IPCC scores.
			
 
				++	 */
			
 
				++	if (!(env->sd->flags & SD_ASYM_PACKING) ||
			
 
				++	    env->idle == CPU_NOT_IDLE)
			
 
				++		return;
			
 
				++
			
 
				++	/*
			
 
				++	 * IPCC scores are used to break ties only between these types of
			
 
				++	 * groups.
			
 
				++	 */
			
 
				++	if (sgs->group_type != group_fully_busy &&
			
 
				++	    sgs->group_type != group_asym_packing)
			
 
				++		return;
			
 
				++
			
 
				++	busy_cpus = sgs->group_weight - sgs->idle_cpus;
			
 
				++
			
 
				++	/* No busy CPUs in the group. No tasks to move. */
			
 
				++	if (!busy_cpus)
			
 
				++		return;
			
 
				++
			
 
				++	score_on_dst_cpu = arch_get_ipcc_score(sgs->min_ipcc, env->dst_cpu);
			
 
				++
			
 
				++	/*
			
 
				++	 * Do not use IPC scores. sgs::ipcc_score_{after, before} will be zero
			
 
				++	 * and not used.
			
 
				++	 */
			
 
				++	if (IS_ERR_VALUE(score_on_dst_cpu))
			
 
				++		return;
			
 
				++
			
 
				++	before = sgs->sum_score;
			
 
				++	after = before - sgs->min_score;
			
 
				++
			
 
				++	/* SMT siblings share throughput. */
			
 
				++	if (busy_cpus > 1 && sg->flags & SD_SHARE_CPUCAPACITY) {
			
 
				++		before /= busy_cpus;
			
 
				++		/* One sibling will become idle after load balance. */
			
 
				++		after /= busy_cpus - 1;
			
 
				++	}
			
 
				++
			
 
				++	sgs->ipcc_score_after = after + score_on_dst_cpu;
			
 
				++	sgs->ipcc_score_before = before;
			
 
				++}
			
 
				++
			
 
				+ #else /* CONFIG_IPC_CLASSES */
			
 
				+ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
			
 
				+ 				    struct rq *rq)
			
 
				+@@ -9166,6 +9224,13 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
			
 
				+ static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
			
 
				+ {
			
 
				+ }
			
 
				++
			
 
				++static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
			
 
				++				      struct sched_group *sg,
			
 
				++				      struct lb_env *env)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				+ #endif /* CONFIG_IPC_CLASSES */
			
 
				+ 
			
 
				+ /**
			
 
				+@@ -9327,6 +9392,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
			
 
				+ 
			
 
				+ 	sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
			
 
				+ 
			
 
				++	if (!local_group)
			
 
				++		update_sg_lb_stats_scores(sgs, group, env);
			
 
				++
			
 
				+ 	/* Computing avg_load makes sense only when group is overloaded */
			
 
				+ 	if (sgs->group_type == group_overloaded)
			
 
				+ 		sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From e93c0032e04663397da64d2fb501ddc3de9c961d Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:49 -0800
			
 
				+Subject: [PATCH] sched/fair: Use IPCC stats to break ties between asym_packing
			
 
				+ sched groups
			
 
				+
			
 
				+As it iterates, update_sd_pick_busiest() keeps on selecting as busiest
			
 
				+sched groups of identical priority. Since both groups have the same
			
 
				+priority, either group is a good choice. The IPCC statistics provide a
			
 
				+measure of the throughput before and after load balance. Use them to
			
 
				+pick a busiest scheduling group from otherwise identical asym_packing
			
 
				+scheduling groups.
			
 
				+
			
 
				+Pick as busiest the scheduling group that yields a higher IPCC score
			
 
				+after load balancing.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 72 +++++++++++++++++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 72 insertions(+)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index ae0c908be707..cffb435e2b1c 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -9215,6 +9215,60 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
			
 
				+ 	sgs->ipcc_score_before = before;
			
 
				+ }
			
 
				+ 
			
 
				++/**
			
 
				++ * sched_asym_ipcc_prefer - Select a sched group based on its IPCC score
			
 
				++ * @a:	Load balancing statistics of a sched group
			
 
				++ * @b:	Load balancing statistics of a second sched group
			
 
				++ *
			
 
				++ * Returns: true if @a has a higher IPCC score than @b after load balance.
			
 
				++ * False otherwise.
			
 
				++ */
			
 
				++static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
			
 
				++				   struct sg_lb_stats *b)
			
 
				++{
			
 
				++	if (!sched_ipcc_enabled())
			
 
				++		return false;
			
 
				++
			
 
				++	/* @a increases overall throughput after load balance. */
			
 
				++	if (a->ipcc_score_after > b->ipcc_score_after)
			
 
				++		return true;
			
 
				++
			
 
				++	/*
			
 
				++	 * If @a and @b yield the same overall throughput, pick @a if
			
 
				++	 * its current throughput is lower than that of @b.
			
 
				++	 */
			
 
				++	if (a->ipcc_score_after == b->ipcc_score_after)
			
 
				++		return a->ipcc_score_before < b->ipcc_score_before;
			
 
				++
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++/**
			
 
				++ * sched_asym_ipcc_pick - Select a sched group based on its IPCC score
			
 
				++ * @a:		A scheduling group
			
 
				++ * @b:		A second scheduling group
			
 
				++ * @a_stats:	Load balancing statistics of @a
			
 
				++ * @b_stats:	Load balancing statistics of @b
			
 
				++ *
			
 
				++ * Returns: true if @a has the same priority and @a has tasks with IPC classes
			
 
				++ * that yield higher overall throughput after load balance. False otherwise.
			
 
				++ */
			
 
				++static bool sched_asym_ipcc_pick(struct sched_group *a,
			
 
				++				 struct sched_group *b,
			
 
				++				 struct sg_lb_stats *a_stats,
			
 
				++				 struct sg_lb_stats *b_stats)
			
 
				++{
			
 
				++	/*
			
 
				++	 * Only use the class-specific preference selection if both sched
			
 
				++	 * groups have the same priority.
			
 
				++	 */
			
 
				++	if (arch_asym_cpu_priority(a->asym_prefer_cpu) !=
			
 
				++	    arch_asym_cpu_priority(b->asym_prefer_cpu))
			
 
				++		return false;
			
 
				++
			
 
				++	return sched_asym_ipcc_prefer(a_stats, b_stats);
			
 
				++}
			
 
				++
			
 
				+ #else /* CONFIG_IPC_CLASSES */
			
 
				+ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
			
 
				+ 				    struct rq *rq)
			
 
				+@@ -9231,6 +9285,14 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
			
 
				+ {
			
 
				+ }
			
 
				+ 
			
 
				++static bool sched_asym_ipcc_pick(struct sched_group *a,
			
 
				++				 struct sched_group *b,
			
 
				++				 struct sg_lb_stats *a_stats,
			
 
				++				 struct sg_lb_stats *b_stats)
			
 
				++{
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				+ #endif /* CONFIG_IPC_CLASSES */
			
 
				+ 
			
 
				+ /**
			
 
				+@@ -9466,6 +9528,16 @@ static bool update_sd_pick_busiest(struct lb_env *env,
			
 
				+ 		/* Prefer to move from lowest priority CPU's work */
			
 
				+ 		if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
			
 
				+ 			return false;
			
 
				++
			
 
				++		/*
			
 
				++		 * Unlike other callers of sched_asym_prefer(), here both @sg
			
 
				++		 * and @sds::busiest have tasks running. When they have equal
			
 
				++		 * priority, their IPC class scores can be used to select a
			
 
				++		 * better busiest.
			
 
				++		 */
			
 
				++		if (sched_asym_ipcc_pick(sds->busiest, sg, &sds->busiest_stat, sgs))
			
 
				++			return false;
			
 
				++
			
 
				+ 		break;
			
 
				+ 
			
 
				+ 	case group_misfit_task:
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 6e3ab209c9551934abd38dedffa499ee7d7902d0 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:50 -0800
			
 
				+Subject: [PATCH] sched/fair: Use IPCC stats to break ties between fully_busy
			
 
				+ SMT groups
			
 
				+
			
 
				+IPCC statistics are used during idle load balancing. After balancing one
			
 
				+of the siblings of an SMT core will become idle. The rest of the busy
			
 
				+siblings will enjoy increased throughput. The IPCC statistics provide
			
 
				+a measure of the increased throughput. Use them to pick a busiest group
			
 
				+from otherwise identical fully_busy scheduling groups (of which the
			
 
				+avg_load is equal - and zero).
			
 
				+
			
 
				+Using IPCC scores to break ties with non-SMT fully_busy sched groups
			
 
				+is not necessary. SMT sched groups always need more help.
			
 
				+
			
 
				+Add a stub sched_asym_ipcc_prefer() for !CONFIG_IPC_CLASSES.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 23 ++++++++++++++++++++---
			
 
				+ 1 file changed, 20 insertions(+), 3 deletions(-)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index cffb435e2b1c..0996339df429 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -9285,6 +9285,12 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
			
 
				+ {
			
 
				+ }
			
 
				+ 
			
 
				++static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
			
 
				++				   struct sg_lb_stats *b)
			
 
				++{
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				+ static bool sched_asym_ipcc_pick(struct sched_group *a,
			
 
				+ 				 struct sched_group *b,
			
 
				+ 				 struct sg_lb_stats *a_stats,
			
 
				+@@ -9568,10 +9574,21 @@ static bool update_sd_pick_busiest(struct lb_env *env,
			
 
				+ 		if (sgs->avg_load == busiest->avg_load) {
			
 
				+ 			/*
			
 
				+ 			 * SMT sched groups need more help than non-SMT groups.
			
 
				+-			 * If @sg happens to also be SMT, either choice is good.
			
 
				+ 			 */
			
 
				+-			if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
			
 
				+-				return false;
			
 
				++			if (sds->busiest->flags & SD_SHARE_CPUCAPACITY) {
			
 
				++				if (!(sg->flags & SD_SHARE_CPUCAPACITY))
			
 
				++					return false;
			
 
				++
			
 
				++				/*
			
 
				++				 * Between two SMT groups, use IPCC scores to pick the
			
 
				++				 * one that would improve throughput the most (only
			
 
				++				 * asym_packing uses IPCC scores for now).
			
 
				++				 */
			
 
				++				if (sched_ipcc_enabled() &&
			
 
				++				    env->sd->flags & SD_ASYM_PACKING &&
			
 
				++				    sched_asym_ipcc_prefer(busiest, sgs))
			
 
				++					return false;
			
 
				++			}
			
 
				+ 		}
			
 
				+ 
			
 
				+ 		break;
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From a293954b9b5f0b273e5acd5cbfa0ba0d70d9c139 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:51 -0800
			
 
				+Subject: [PATCH] sched/fair: Use IPCC scores to select a busiest runqueue
			
 
				+
			
 
				+For two runqueues of equal priority and equal number of running of tasks,
			
 
				+select the one whose current task would have the highest IPC class score
			
 
				+if placed on the destination CPU.
			
 
				+
			
 
				+For now, use IPCC scores only for scheduling domains with the
			
 
				+SD_ASYM_PACKING flag.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/fair.c | 64 +++++++++++++++++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 64 insertions(+)
			
 
				+
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index 0996339df429..a9a105092e7c 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -9269,6 +9269,37 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
			
 
				+ 	return sched_asym_ipcc_prefer(a_stats, b_stats);
			
 
				+ }
			
 
				+ 
			
 
				++/**
			
 
				++ * ipcc_score_delta - Get the IPCC score delta wrt the load balance's dst_cpu
			
 
				++ * @p:		A task
			
 
				++ * @env:	Load balancing environment
			
 
				++ *
			
 
				++ * Returns: The IPCC score delta that @p would get if placed in the destination
			
 
				++ * CPU of @env. LONG_MIN to indicate that the delta should not be used.
			
 
				++ */
			
 
				++static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
			
 
				++{
			
 
				++	unsigned long score_src, score_dst;
			
 
				++	unsigned short ipcc = p->ipcc;
			
 
				++
			
 
				++	if (!sched_ipcc_enabled())
			
 
				++		return LONG_MIN;
			
 
				++
			
 
				++	/* Only asym_packing uses IPCC scores at the moment. */
			
 
				++	if (!(env->sd->flags & SD_ASYM_PACKING))
			
 
				++		return LONG_MIN;
			
 
				++
			
 
				++	score_dst = arch_get_ipcc_score(ipcc, env->dst_cpu);
			
 
				++	if (IS_ERR_VALUE(score_dst))
			
 
				++		return LONG_MIN;
			
 
				++
			
 
				++	score_src = arch_get_ipcc_score(ipcc, task_cpu(p));
			
 
				++	if (IS_ERR_VALUE(score_src))
			
 
				++		return LONG_MIN;
			
 
				++
			
 
				++	return score_dst - score_src;
			
 
				++}
			
 
				++
			
 
				+ #else /* CONFIG_IPC_CLASSES */
			
 
				+ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
			
 
				+ 				    struct rq *rq)
			
 
				+@@ -9299,6 +9330,11 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
			
 
				+ 	return false;
			
 
				+ }
			
 
				+ 
			
 
				++static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
			
 
				++{
			
 
				++	return LONG_MIN;
			
 
				++}
			
 
				++
			
 
				+ #endif /* CONFIG_IPC_CLASSES */
			
 
				+ 
			
 
				+ /**
			
 
				+@@ -10459,6 +10495,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
			
 
				+ {
			
 
				+ 	struct rq *busiest = NULL, *rq;
			
 
				+ 	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
			
 
				++	long busiest_ipcc_delta = LONG_MIN;
			
 
				+ 	unsigned int busiest_nr = 0;
			
 
				+ 	int i;
			
 
				+ 
			
 
				+@@ -10575,8 +10612,35 @@ static struct rq *find_busiest_queue(struct lb_env *env,
			
 
				+ 
			
 
				+ 		case migrate_task:
			
 
				+ 			if (busiest_nr < nr_running) {
			
 
				++				struct task_struct *curr;
			
 
				++
			
 
				+ 				busiest_nr = nr_running;
			
 
				+ 				busiest = rq;
			
 
				++
			
 
				++				/*
			
 
				++				 * Remember the IPCC score delta of busiest::curr.
			
 
				++				 * We may need it to break a tie with other queues
			
 
				++				 * with equal nr_running.
			
 
				++				 */
			
 
				++				curr = rcu_dereference(busiest->curr);
			
 
				++				busiest_ipcc_delta = ipcc_score_delta(curr, env);
			
 
				++			/*
			
 
				++			 * If rq and busiest have the same number of running
			
 
				++			 * tasks and IPC classes are supported, pick rq if doing
			
 
				++			 * so would give rq::curr a bigger IPC boost on dst_cpu.
			
 
				++			 */
			
 
				++			} else if (busiest_nr == nr_running) {
			
 
				++				struct task_struct *curr;
			
 
				++				long delta;
			
 
				++
			
 
				++				curr = rcu_dereference(rq->curr);
			
 
				++				delta = ipcc_score_delta(curr, env);
			
 
				++
			
 
				++				if (busiest_ipcc_delta < delta) {
			
 
				++					busiest_ipcc_delta = delta;
			
 
				++					busiest_nr = nr_running;
			
 
				++					busiest = rq;
			
 
				++				}
			
 
				+ 			}
			
 
				+ 			break;
			
 
				+ 
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 8c517b81e0894d90b440d862bc1704259a94cf46 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:52 -0800
			
 
				+Subject: [PATCH] thermal: intel: hfi: Introduce Intel Thread Director classes
			
 
				+
			
 
				+On Intel hybrid parts, each type of CPU has specific performance and
			
 
				+energy efficiency capabilities. The Intel Thread Director technology
			
 
				+extends the Hardware Feedback Interface (HFI) to provide performance and
			
 
				+energy efficiency data for advanced classes of instructions.
			
 
				+
			
 
				+Add support to parse per-class capabilities.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ drivers/thermal/intel/intel_hfi.c | 30 ++++++++++++++++++++++++------
			
 
				+ 1 file changed, 24 insertions(+), 6 deletions(-)
			
 
				+
			
 
				+diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
			
 
				+index 6e604bda2b93..2527ae3836c7 100644
			
 
				+--- a/drivers/thermal/intel/intel_hfi.c
			
 
				++++ b/drivers/thermal/intel/intel_hfi.c
			
 
				+@@ -77,7 +77,7 @@ union cpuid6_edx {
			
 
				+  * @ee_cap:		Energy efficiency capability
			
 
				+  *
			
 
				+  * Capabilities of a logical processor in the HFI table. These capabilities are
			
 
				+- * unitless.
			
 
				++ * unitless and specific to each HFI class.
			
 
				+  */
			
 
				+ struct hfi_cpu_data {
			
 
				+ 	u8	perf_cap;
			
 
				+@@ -89,7 +89,8 @@ struct hfi_cpu_data {
			
 
				+  * @perf_updated:	Hardware updated performance capabilities
			
 
				+  * @ee_updated:		Hardware updated energy efficiency capabilities
			
 
				+  *
			
 
				+- * Properties of the data in an HFI table.
			
 
				++ * Properties of the data in an HFI table. There exists one header per each
			
 
				++ * HFI class.
			
 
				+  */
			
 
				+ struct hfi_hdr {
			
 
				+ 	u8	perf_updated;
			
 
				+@@ -127,16 +128,21 @@ struct hfi_instance {
			
 
				+ 
			
 
				+ /**
			
 
				+  * struct hfi_features - Supported HFI features
			
 
				++ * @nr_classes:		Number of classes supported
			
 
				+  * @nr_table_pages:	Size of the HFI table in 4KB pages
			
 
				+  * @cpu_stride:		Stride size to locate the capability data of a logical
			
 
				+  *			processor within the table (i.e., row stride)
			
 
				++ * @class_stride:	Stride size to locate a class within the capability
			
 
				++ *			data of a logical processor or the HFI table header
			
 
				+  * @hdr_size:		Size of the table header
			
 
				+  *
			
 
				+  * Parameters and supported features that are common to all HFI instances
			
 
				+  */
			
 
				+ struct hfi_features {
			
 
				++	unsigned int	nr_classes;
			
 
				+ 	size_t		nr_table_pages;
			
 
				+ 	unsigned int	cpu_stride;
			
 
				++	unsigned int	class_stride;
			
 
				+ 	unsigned int	hdr_size;
			
 
				+ };
			
 
				+ 
			
 
				+@@ -333,8 +339,8 @@ static void init_hfi_cpu_index(struct hfi_cpu_info *info)
			
 
				+ }
			
 
				+ 
			
 
				+ /*
			
 
				+- * The format of the HFI table depends on the number of capabilities that the
			
 
				+- * hardware supports. Keep a data structure to navigate the table.
			
 
				++ * The format of the HFI table depends on the number of capabilities and classes
			
 
				++ * that the hardware supports. Keep a data structure to navigate the table.
			
 
				+  */
			
 
				+ static void init_hfi_instance(struct hfi_instance *hfi_instance)
			
 
				+ {
			
 
				+@@ -515,18 +521,30 @@ static __init int hfi_parse_features(void)
			
 
				+ 	/* The number of 4KB pages required by the table */
			
 
				+ 	hfi_features.nr_table_pages = edx.split.table_pages + 1;
			
 
				+ 
			
 
				++	/*
			
 
				++	 * Capability fields of an HFI class are grouped together. Classes are
			
 
				++	 * contiguous in memory.  Hence, use the number of supported features to
			
 
				++	 * locate a specific class.
			
 
				++	 */
			
 
				++	hfi_features.class_stride = nr_capabilities;
			
 
				++
			
 
				++	/* For now, use only one class of the HFI table */
			
 
				++	hfi_features.nr_classes = 1;
			
 
				++
			
 
				+ 	/*
			
 
				+ 	 * The header contains change indications for each supported feature.
			
 
				+ 	 * The size of the table header is rounded up to be a multiple of 8
			
 
				+ 	 * bytes.
			
 
				+ 	 */
			
 
				+-	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
			
 
				++	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities *
			
 
				++					     hfi_features.nr_classes, 8) * 8;
			
 
				+ 
			
 
				+ 	/*
			
 
				+ 	 * Data of each logical processor is also rounded up to be a multiple
			
 
				+ 	 * of 8 bytes.
			
 
				+ 	 */
			
 
				+-	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
			
 
				++	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities *
			
 
				++					       hfi_features.nr_classes, 8) * 8;
			
 
				+ 
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 258fdd38eadf1a4b1cff687dcc99a834ca97095f Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:53 -0800
			
 
				+Subject: [PATCH] x86/cpufeatures: Add the Intel Thread Director feature
			
 
				+ definitions
			
 
				+
			
 
				+Intel Thread Director (ITD) provides hardware resources to classify
			
 
				+the current task. The classification reflects the type of instructions that
			
 
				+a task currently executes.
			
 
				+
			
 
				+ITD extends the Hardware Feedback Interface table to provide performance
			
 
				+and energy efficiency capabilities for each of the supported classes of
			
 
				+tasks.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ arch/x86/include/asm/cpufeatures.h       | 1 +
			
 
				+ arch/x86/include/asm/disabled-features.h | 8 +++++++-
			
 
				+ arch/x86/kernel/cpu/cpuid-deps.c         | 1 +
			
 
				+ 3 files changed, 9 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
			
 
				+index 8f39c46197b8..a2f2730737ae 100644
			
 
				+--- a/arch/x86/include/asm/cpufeatures.h
			
 
				++++ b/arch/x86/include/asm/cpufeatures.h
			
 
				+@@ -345,6 +345,7 @@
			
 
				+ #define X86_FEATURE_HWP_EPP		(14*32+10) /* HWP Energy Perf. Preference */
			
 
				+ #define X86_FEATURE_HWP_PKG_REQ		(14*32+11) /* HWP Package Level Request */
			
 
				+ #define X86_FEATURE_HFI			(14*32+19) /* Hardware Feedback Interface */
			
 
				++#define X86_FEATURE_ITD			(14*32+23) /* Intel Thread Director */
			
 
				+ 
			
 
				+ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
			
 
				+ #define X86_FEATURE_NPT			(15*32+ 0) /* Nested Page Table support */
			
 
				+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
			
 
				+index c44b56f7ffba..0edd9bef7f2e 100644
			
 
				+--- a/arch/x86/include/asm/disabled-features.h
			
 
				++++ b/arch/x86/include/asm/disabled-features.h
			
 
				+@@ -99,6 +99,12 @@
			
 
				+ # define DISABLE_TDX_GUEST	(1 << (X86_FEATURE_TDX_GUEST & 31))
			
 
				+ #endif
			
 
				+ 
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++# define DISABLE_ITD	0
			
 
				++#else
			
 
				++# define DISABLE_ITD	(1 << (X86_FEATURE_ITD & 31))
			
 
				++#endif
			
 
				++
			
 
				+ /*
			
 
				+  * Make sure to add features to the correct mask
			
 
				+  */
			
 
				+@@ -117,7 +123,7 @@
			
 
				+ 			 DISABLE_CALL_DEPTH_TRACKING)
			
 
				+ #define DISABLED_MASK12	0
			
 
				+ #define DISABLED_MASK13	0
			
 
				+-#define DISABLED_MASK14	0
			
 
				++#define DISABLED_MASK14	(DISABLE_ITD)
			
 
				+ #define DISABLED_MASK15	0
			
 
				+ #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
			
 
				+ 			 DISABLE_ENQCMD)
			
 
				+diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
			
 
				+index d95221117129..277f157e067e 100644
			
 
				+--- a/arch/x86/kernel/cpu/cpuid-deps.c
			
 
				++++ b/arch/x86/kernel/cpu/cpuid-deps.c
			
 
				+@@ -79,6 +79,7 @@ static const struct cpuid_dep cpuid_deps[] = {
			
 
				+ 	{ X86_FEATURE_XFD,			X86_FEATURE_XSAVES    },
			
 
				+ 	{ X86_FEATURE_XFD,			X86_FEATURE_XGETBV1   },
			
 
				+ 	{ X86_FEATURE_AMX_TILE,			X86_FEATURE_XFD       },
			
 
				++	{ X86_FEATURE_ITD,			X86_FEATURE_HFI       },
			
 
				+ 	{}
			
 
				+ };
			
 
				+ 
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From b2c8d8d2cf45125c1b3be140385979a1cadcc4ca Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:54 -0800
			
 
				+Subject: [PATCH] thermal: intel: hfi: Store per-CPU IPCC scores
			
 
				+
			
 
				+The scheduler reads the IPCC scores when balancing load. These reads can
			
 
				+be quite frequent. Hardware can also update the HFI table frequently.
			
 
				+Concurrent access may cause a lot of lock contention. It gets worse as the
			
 
				+number of CPUs increases.
			
 
				+
			
 
				+Instead, create separate per-CPU IPCC scores that the scheduler can read
			
 
				+without the HFI table lock.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 46 insertions(+)
			
 
				+
			
 
				+diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
			
 
				+index 2527ae3836c7..b06021828892 100644
			
 
				+--- a/drivers/thermal/intel/intel_hfi.c
			
 
				++++ b/drivers/thermal/intel/intel_hfi.c
			
 
				+@@ -29,6 +29,7 @@
			
 
				+ #include <linux/kernel.h>
			
 
				+ #include <linux/math.h>
			
 
				+ #include <linux/mutex.h>
			
 
				++#include <linux/percpu.h>
			
 
				+ #include <linux/percpu-defs.h>
			
 
				+ #include <linux/printk.h>
			
 
				+ #include <linux/processor.h>
			
 
				+@@ -170,6 +171,43 @@ static struct workqueue_struct *hfi_updates_wq;
			
 
				+ #define HFI_UPDATE_INTERVAL		HZ
			
 
				+ #define HFI_MAX_THERM_NOTIFY_COUNT	16
			
 
				+ 
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++static int __percpu *hfi_ipcc_scores;
			
 
				++
			
 
				++static int alloc_hfi_ipcc_scores(void)
			
 
				++{
			
 
				++	if (!cpu_feature_enabled(X86_FEATURE_ITD))
			
 
				++		return 0;
			
 
				++
			
 
				++	hfi_ipcc_scores = __alloc_percpu(sizeof(*hfi_ipcc_scores) *
			
 
				++					 hfi_features.nr_classes,
			
 
				++					 sizeof(*hfi_ipcc_scores));
			
 
				++
			
 
				++	return !hfi_ipcc_scores;
			
 
				++}
			
 
				++
			
 
				++static void set_hfi_ipcc_score(void *caps, int cpu)
			
 
				++{
			
 
				++	int i, *hfi_class;
			
 
				++
			
 
				++	if (!cpu_feature_enabled(X86_FEATURE_ITD))
			
 
				++		return;
			
 
				++
			
 
				++	hfi_class = per_cpu_ptr(hfi_ipcc_scores, cpu);
			
 
				++
			
 
				++	for (i = 0;  i < hfi_features.nr_classes; i++) {
			
 
				++		struct hfi_cpu_data *class_caps;
			
 
				++
			
 
				++		class_caps = caps + i * hfi_features.class_stride;
			
 
				++		WRITE_ONCE(hfi_class[i], class_caps->perf_cap);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++#else
			
 
				++static int alloc_hfi_ipcc_scores(void) { return 0; }
			
 
				++static void set_hfi_ipcc_score(void *caps, int cpu) { }
			
 
				++#endif /* CONFIG_IPC_CLASSES */
			
 
				++
			
 
				+ static void get_hfi_caps(struct hfi_instance *hfi_instance,
			
 
				+ 			 struct thermal_genl_cpu_caps *cpu_caps)
			
 
				+ {
			
 
				+@@ -192,6 +230,8 @@ static void get_hfi_caps(struct hfi_instance *hfi_instance,
			
 
				+ 		cpu_caps[i].efficiency = caps->ee_cap << 2;
			
 
				+ 
			
 
				+ 		++i;
			
 
				++
			
 
				++		set_hfi_ipcc_score(caps, cpu);
			
 
				+ 	}
			
 
				+ 	raw_spin_unlock_irq(&hfi_instance->table_lock);
			
 
				+ }
			
 
				+@@ -580,8 +620,14 @@ void __init intel_hfi_init(void)
			
 
				+ 	if (!hfi_updates_wq)
			
 
				+ 		goto err_nomem;
			
 
				+ 
			
 
				++	if (alloc_hfi_ipcc_scores())
			
 
				++		goto err_ipcc;
			
 
				++
			
 
				+ 	return;
			
 
				+ 
			
 
				++err_ipcc:
			
 
				++	destroy_workqueue(hfi_updates_wq);
			
 
				++
			
 
				+ err_nomem:
			
 
				+ 	for (j = 0; j < i; ++j) {
			
 
				+ 		hfi_instance = &hfi_instances[j];
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 55930531b4e99582a7b9969e810178c0317f196a Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:55 -0800
			
 
				+Subject: [PATCH] thermal: intel: hfi: Update the IPC class of the current task
			
 
				+
			
 
				+Use Intel Thread Director classification to update the IPC class of a
			
 
				+task. Implement the arch_update_ipcc() interface of the scheduler.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ arch/x86/include/asm/topology.h   |  6 ++++++
			
 
				+ drivers/thermal/intel/intel_hfi.c | 32 +++++++++++++++++++++++++++++++
			
 
				+ 2 files changed, 38 insertions(+)
			
 
				+
			
 
				+diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
			
 
				+index 458c891a8273..ffcdac3f398f 100644
			
 
				+--- a/arch/x86/include/asm/topology.h
			
 
				++++ b/arch/x86/include/asm/topology.h
			
 
				+@@ -227,4 +227,10 @@ void init_freq_invariance_cppc(void);
			
 
				+ #define arch_init_invariance_cppc init_freq_invariance_cppc
			
 
				+ #endif
			
 
				+ 
			
 
				++#if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
			
 
				++void intel_hfi_update_ipcc(struct task_struct *curr);
			
 
				++
			
 
				++#define arch_update_ipcc intel_hfi_update_ipcc
			
 
				++#endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
			
 
				++
			
 
				+ #endif /* _ASM_X86_TOPOLOGY_H */
			
 
				+diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
			
 
				+index b06021828892..530dcf57e06e 100644
			
 
				+--- a/drivers/thermal/intel/intel_hfi.c
			
 
				++++ b/drivers/thermal/intel/intel_hfi.c
			
 
				+@@ -72,6 +72,17 @@ union cpuid6_edx {
			
 
				+ 	u32 full;
			
 
				+ };
			
 
				+ 
			
 
				++#ifdef CONFIG_IPC_CLASSES
			
 
				++union hfi_thread_feedback_char_msr {
			
 
				++	struct {
			
 
				++		u64	classid : 8;
			
 
				++		u64	__reserved : 55;
			
 
				++		u64	valid : 1;
			
 
				++	} split;
			
 
				++	u64 full;
			
 
				++};
			
 
				++#endif
			
 
				++
			
 
				+ /**
			
 
				+  * struct hfi_cpu_data - HFI capabilities per CPU
			
 
				+  * @perf_cap:		Performance capability
			
 
				+@@ -174,6 +185,27 @@ static struct workqueue_struct *hfi_updates_wq;
			
 
				+ #ifdef CONFIG_IPC_CLASSES
			
 
				+ static int __percpu *hfi_ipcc_scores;
			
 
				+ 
			
 
				++void intel_hfi_update_ipcc(struct task_struct *curr)
			
 
				++{
			
 
				++	union hfi_thread_feedback_char_msr msr;
			
 
				++
			
 
				++	/* We should not be here if ITD is not supported. */
			
 
				++	if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
			
 
				++		pr_warn_once("task classification requested but not supported!");
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				++	rdmsrl(MSR_IA32_HW_FEEDBACK_CHAR, msr.full);
			
 
				++	if (!msr.split.valid)
			
 
				++		return;
			
 
				++
			
 
				++	/*
			
 
				++	 * 0 is a valid classification for Intel Thread Director. A scheduler
			
 
				++	 * IPCC class of 0 means that the task is unclassified. Adjust.
			
 
				++	 */
			
 
				++	curr->ipcc = msr.split.classid + 1;
			
 
				++}
			
 
				++
			
 
				+ static int alloc_hfi_ipcc_scores(void)
			
 
				+ {
			
 
				+ 	if (!cpu_feature_enabled(X86_FEATURE_ITD))
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 3ace3fa2778cce8d16caec8e828145b4dc7f2532 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:56 -0800
			
 
				+Subject: [PATCH] thermal: intel: hfi: Report the IPC class score of a CPU
			
 
				+
			
 
				+Implement the arch_get_ipcc_score() interface of the scheduler. Use the
			
 
				+performance capabilities of the extended Hardware Feedback Interface table
			
 
				+as the IPC score.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ arch/x86/include/asm/topology.h   |  2 ++
			
 
				+ drivers/thermal/intel/intel_hfi.c | 27 +++++++++++++++++++++++++++
			
 
				+ 2 files changed, 29 insertions(+)
			
 
				+
			
 
				+diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
			
 
				+index ffcdac3f398f..c4fcd9c3c634 100644
			
 
				+--- a/arch/x86/include/asm/topology.h
			
 
				++++ b/arch/x86/include/asm/topology.h
			
 
				+@@ -229,8 +229,10 @@ void init_freq_invariance_cppc(void);
			
 
				+ 
			
 
				+ #if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
			
 
				+ void intel_hfi_update_ipcc(struct task_struct *curr);
			
 
				++unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu);
			
 
				+ 
			
 
				+ #define arch_update_ipcc intel_hfi_update_ipcc
			
 
				++#define arch_get_ipcc_score intel_hfi_get_ipcc_score
			
 
				+ #endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
			
 
				+ 
			
 
				+ #endif /* _ASM_X86_TOPOLOGY_H */
			
 
				+diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
			
 
				+index 530dcf57e06e..fa9b4a678d92 100644
			
 
				+--- a/drivers/thermal/intel/intel_hfi.c
			
 
				++++ b/drivers/thermal/intel/intel_hfi.c
			
 
				+@@ -206,6 +206,33 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
			
 
				+ 	curr->ipcc = msr.split.classid + 1;
			
 
				+ }
			
 
				+ 
			
 
				++unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
			
 
				++{
			
 
				++	unsigned short hfi_class;
			
 
				++	int *scores;
			
 
				++
			
 
				++	if (cpu < 0 || cpu >= nr_cpu_ids)
			
 
				++		return -EINVAL;
			
 
				++
			
 
				++	if (ipcc == IPC_CLASS_UNCLASSIFIED)
			
 
				++		return -EINVAL;
			
 
				++
			
 
				++	/*
			
 
				++	 * Scheduler IPC classes start at 1. HFI classes start at 0.
			
 
				++	 * See note intel_hfi_update_ipcc().
			
 
				++	 */
			
 
				++	hfi_class = ipcc - 1;
			
 
				++
			
 
				++	if (hfi_class >= hfi_features.nr_classes)
			
 
				++		return -EINVAL;
			
 
				++
			
 
				++	scores = per_cpu_ptr(hfi_ipcc_scores, cpu);
			
 
				++	if (!scores)
			
 
				++		return -ENODEV;
			
 
				++
			
 
				++	return READ_ONCE(scores[hfi_class]);
			
 
				++}
			
 
				++
			
 
				+ static int alloc_hfi_ipcc_scores(void)
			
 
				+ {
			
 
				+ 	if (!cpu_feature_enabled(X86_FEATURE_ITD))
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 7637b8a5d201d49ef56d31f22af30531d0193538 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:57 -0800
			
 
				+Subject: [PATCH] thermal: intel: hfi: Define a default class for unclassified
			
 
				+ tasks
			
 
				+
			
 
				+A task may be unclassified if it has been recently created, spend most of
			
 
				+its lifetime sleeping, or hardware has not provided a classification.
			
 
				+
			
 
				+Most tasks will be eventually classified as scheduler's IPC class 1
			
 
				+(HFI class 0). This class corresponds to the capabilities in the legacy,
			
 
				+classless, HFI table.
			
 
				+
			
 
				+IPC class 1 is a reasonable choice until hardware provides an actual
			
 
				+classification. Meanwhile, the scheduler will place classes of tasks with
			
 
				+higher IPC scores on higher-performance CPUs.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ drivers/thermal/intel/intel_hfi.c | 15 ++++++++++++++-
			
 
				+ 1 file changed, 14 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
			
 
				+index fa9b4a678d92..7ea6acce7107 100644
			
 
				+--- a/drivers/thermal/intel/intel_hfi.c
			
 
				++++ b/drivers/thermal/intel/intel_hfi.c
			
 
				+@@ -185,6 +185,19 @@ static struct workqueue_struct *hfi_updates_wq;
			
 
				+ #ifdef CONFIG_IPC_CLASSES
			
 
				+ static int __percpu *hfi_ipcc_scores;
			
 
				+ 
			
 
				++/*
			
 
				++ * A task may be unclassified if it has been recently created, spend most of
			
 
				++ * its lifetime sleeping, or hardware has not provided a classification.
			
 
				++ *
			
 
				++ * Most tasks will be classified as scheduler's IPC class 1 (HFI class 0)
			
 
				++ * eventually. Meanwhile, the scheduler will place classes of tasks with higher
			
 
				++ * IPC scores on higher-performance CPUs.
			
 
				++ *
			
 
				++ * IPC class 1 is a reasonable choice. It matches the performance capability
			
 
				++ * of the legacy, classless, HFI table.
			
 
				++ */
			
 
				++#define HFI_UNCLASSIFIED_DEFAULT 1
			
 
				++
			
 
				+ void intel_hfi_update_ipcc(struct task_struct *curr)
			
 
				+ {
			
 
				+ 	union hfi_thread_feedback_char_msr msr;
			
 
				+@@ -215,7 +228,7 @@ unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
			
 
				+ 		return -EINVAL;
			
 
				+ 
			
 
				+ 	if (ipcc == IPC_CLASS_UNCLASSIFIED)
			
 
				+-		return -EINVAL;
			
 
				++		ipcc = HFI_UNCLASSIFIED_DEFAULT;
			
 
				+ 
			
 
				+ 	/*
			
 
				+ 	 * Scheduler IPC classes start at 1. HFI classes start at 0.
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 9ddcae3ee191e5e27247d7ea9456d768919ac21f Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:58 -0800
			
 
				+Subject: [PATCH] thermal: intel: hfi: Enable the Intel Thread Director
			
 
				+
			
 
				+Enable Intel Thread Director from the CPU hotplug callback: globally from
			
 
				+CPU0 and then enable the thread-classification hardware in each logical
			
 
				+processor individually.
			
 
				+
			
 
				+Also, initialize the number of classes supported.
			
 
				+
			
 
				+Let the scheduler know that it can start using IPC classes.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ arch/x86/include/asm/msr-index.h  |  2 ++
			
 
				+ drivers/thermal/intel/intel_hfi.c | 40 +++++++++++++++++++++++++++++--
			
 
				+ 2 files changed, 40 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
			
 
				+index d3fe82c5d6b6..d83437d3473d 100644
			
 
				+--- a/arch/x86/include/asm/msr-index.h
			
 
				++++ b/arch/x86/include/asm/msr-index.h
			
 
				+@@ -1095,6 +1095,8 @@
			
 
				+ /* Hardware Feedback Interface */
			
 
				+ #define MSR_IA32_HW_FEEDBACK_PTR        0x17d0
			
 
				+ #define MSR_IA32_HW_FEEDBACK_CONFIG     0x17d1
			
 
				++#define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
			
 
				++#define MSR_IA32_HW_FEEDBACK_CHAR	0x17d2
			
 
				+ 
			
 
				+ /* x2APIC locked status */
			
 
				+ #define MSR_IA32_XAPIC_DISABLE_STATUS	0xBD
			
 
				+diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
			
 
				+index 7ea6acce7107..35d947f47550 100644
			
 
				+--- a/drivers/thermal/intel/intel_hfi.c
			
 
				++++ b/drivers/thermal/intel/intel_hfi.c
			
 
				+@@ -48,6 +48,8 @@
			
 
				+ /* Hardware Feedback Interface MSR configuration bits */
			
 
				+ #define HW_FEEDBACK_PTR_VALID_BIT		BIT(0)
			
 
				+ #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT	BIT(0)
			
 
				++#define HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT	BIT(1)
			
 
				++#define HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT	BIT(0)
			
 
				+ 
			
 
				+ /* CPUID detection and enumeration definitions for HFI */
			
 
				+ 
			
 
				+@@ -72,6 +74,15 @@ union cpuid6_edx {
			
 
				+ 	u32 full;
			
 
				+ };
			
 
				+ 
			
 
				++union cpuid6_ecx {
			
 
				++	struct {
			
 
				++		u32	dont_care0:8;
			
 
				++		u32	nr_classes:8;
			
 
				++		u32	dont_care1:16;
			
 
				++	} split;
			
 
				++	u32 full;
			
 
				++};
			
 
				++
			
 
				+ #ifdef CONFIG_IPC_CLASSES
			
 
				+ union hfi_thread_feedback_char_msr {
			
 
				+ 	struct {
			
 
				+@@ -506,6 +517,11 @@ void intel_hfi_online(unsigned int cpu)
			
 
				+ 
			
 
				+ 	init_hfi_cpu_index(info);
			
 
				+ 
			
 
				++	if (cpu_feature_enabled(X86_FEATURE_ITD)) {
			
 
				++		msr_val = HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT;
			
 
				++		wrmsrl(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, msr_val);
			
 
				++	}
			
 
				++
			
 
				+ 	/*
			
 
				+ 	 * Now check if the HFI instance of the package/die of @cpu has been
			
 
				+ 	 * initialized (by checking its header). In such case, all we have to
			
 
				+@@ -561,8 +577,22 @@ void intel_hfi_online(unsigned int cpu)
			
 
				+ 	 */
			
 
				+ 	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
			
 
				+ 	msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
			
 
				++
			
 
				++	if (cpu_feature_enabled(X86_FEATURE_ITD))
			
 
				++		msr_val |= HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT;
			
 
				++
			
 
				+ 	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
			
 
				+ 
			
 
				++	/*
			
 
				++	 * We have all we need to support IPC classes. Task classification is
			
 
				++	 * now working.
			
 
				++	 *
			
 
				++	 * All class scores are zero until after the first HFI update. That is
			
 
				++	 * OK. The scheduler queries these scores at every load balance.
			
 
				++	 */
			
 
				++	if (cpu_feature_enabled(X86_FEATURE_ITD))
			
 
				++		sched_enable_ipc_classes();
			
 
				++
			
 
				+ unlock:
			
 
				+ 	mutex_unlock(&hfi_instance_lock);
			
 
				+ 	return;
			
 
				+@@ -640,8 +670,14 @@ static __init int hfi_parse_features(void)
			
 
				+ 	 */
			
 
				+ 	hfi_features.class_stride = nr_capabilities;
			
 
				+ 
			
 
				+-	/* For now, use only one class of the HFI table */
			
 
				+-	hfi_features.nr_classes = 1;
			
 
				++	if (cpu_feature_enabled(X86_FEATURE_ITD)) {
			
 
				++		union cpuid6_ecx ecx;
			
 
				++
			
 
				++		ecx.full = cpuid_ecx(CPUID_HFI_LEAF);
			
 
				++		hfi_features.nr_classes = ecx.split.nr_classes;
			
 
				++	} else {
			
 
				++		hfi_features.nr_classes = 1;
			
 
				++	}
			
 
				+ 
			
 
				+ 	/*
			
 
				+ 	 * The header contains change indications for each supported feature.
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From aeb2e2fb157001cdd6c10d261fe006c8aa22bf06 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:10:59 -0800
			
 
				+Subject: [PATCH] sched/task_struct: Add helpers for IPC classification
			
 
				+
			
 
				+The unprocessed classification that hardware provides for a task may not
			
 
				+be usable by the scheduler: the classification may change too frequently or
			
 
				+architectures may want to consider extra factors. For instance, some
			
 
				+processors with Intel Thread Director need to consider the state of the SMT
			
 
				+siblings of a core.
			
 
				+
			
 
				+Provide per-task helper variables that architectures can use to post-
			
 
				+process the classification that hardware provides.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ include/linux/sched.h | 12 +++++++++++-
			
 
				+ 1 file changed, 11 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+diff --git a/include/linux/sched.h b/include/linux/sched.h
			
 
				+index 4f96c3dd59d0..582e14cf3f76 100644
			
 
				+--- a/include/linux/sched.h
			
 
				++++ b/include/linux/sched.h
			
 
				+@@ -1529,7 +1529,17 @@ struct task_struct {
			
 
				+ 	 * A hardware-defined classification of task that reflects but is
			
 
				+ 	 * not identical to the number of instructions per cycle.
			
 
				+ 	 */
			
 
				+-	unsigned short			ipcc;
			
 
				++	unsigned int			ipcc : 9;
			
 
				++	/*
			
 
				++	 * A candidate classification that arch-specific implementations
			
 
				++	 * qualify for correctness.
			
 
				++	 */
			
 
				++	unsigned int			ipcc_tmp : 9;
			
 
				++	/*
			
 
				++	 * Counter to filter out transient candidate classifications
			
 
				++	 * of a task.
			
 
				++	 */
			
 
				++	unsigned int			ipcc_cntr : 14;
			
 
				+ #endif
			
 
				+ 
			
 
				+ 	/*
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From fd936723a40205d2b47336596468dba9c59a4287 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:11:00 -0800
			
 
				+Subject: [PATCH] sched/core: Initialize helpers of task classification
			
 
				+
			
 
				+Just as tasks start life unclassified, initialize the classification
			
 
				+auxiliar variables.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ kernel/sched/core.c | 2 ++
			
 
				+ 1 file changed, 2 insertions(+)
			
 
				+
			
 
				+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
			
 
				+index 0ab39cc055c7..2a942fc3c309 100644
			
 
				+--- a/kernel/sched/core.c
			
 
				++++ b/kernel/sched/core.c
			
 
				+@@ -4426,6 +4426,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
			
 
				+ 	p->se.vruntime			= 0;
			
 
				+ #ifdef CONFIG_IPC_CLASSES
			
 
				+ 	p->ipcc				= IPC_CLASS_UNCLASSIFIED;
			
 
				++	p->ipcc_tmp			= IPC_CLASS_UNCLASSIFIED;
			
 
				++	p->ipcc_cntr			= 0;
			
 
				+ #endif
			
 
				+ 	INIT_LIST_HEAD(&p->se.group_node);
			
 
				+ 
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From b98db691b522d6b2ed0dc1bd17e77165b7531ba9 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:11:01 -0800
			
 
				+Subject: [PATCH] sched/fair: Introduce sched_smt_siblings_idle()
			
 
				+
			
 
				+X86 needs to know the idle state of the SMT siblings of a CPU to improve
			
 
				+the accuracy of IPCC classification. X86 implements support for IPC classes
			
 
				+in the thermal HFI driver.
			
 
				+
			
 
				+Rename is_core_idle() as sched_smt_siblings_idle() and make it available
			
 
				+outside the scheduler code.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ include/linux/sched.h |  2 ++
			
 
				+ kernel/sched/fair.c   | 21 +++++++++++++++------
			
 
				+ 2 files changed, 17 insertions(+), 6 deletions(-)
			
 
				+
			
 
				+diff --git a/include/linux/sched.h b/include/linux/sched.h
			
 
				+index 582e14cf3f76..f2adf662eda8 100644
			
 
				+--- a/include/linux/sched.h
			
 
				++++ b/include/linux/sched.h
			
 
				+@@ -2440,4 +2440,6 @@ static inline void sched_core_fork(struct task_struct *p) { }
			
 
				+ 
			
 
				+ extern void sched_set_stop_task(int cpu, struct task_struct *stop);
			
 
				+ 
			
 
				++extern bool sched_smt_siblings_idle(int cpu);
			
 
				++
			
 
				+ #endif
			
 
				+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
			
 
				+index a9a105092e7c..97c574d5fa57 100644
			
 
				+--- a/kernel/sched/fair.c
			
 
				++++ b/kernel/sched/fair.c
			
 
				+@@ -1064,7 +1064,14 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
			
 
				+  * Scheduling class queueing methods:
			
 
				+  */
			
 
				+ 
			
 
				+-static inline bool is_core_idle(int cpu)
			
 
				++/**
			
 
				++ * sched_smt_siblings_idle - Check whether SMT siblings of a CPU are idle
			
 
				++ * @cpu:	The CPU to check
			
 
				++ *
			
 
				++ * Returns true if all the SMT siblings of @cpu are idle or @cpu does not have
			
 
				++ * SMT siblings. The idle state of @cpu is not considered.
			
 
				++ */
			
 
				++bool sched_smt_siblings_idle(int cpu)
			
 
				+ {
			
 
				+ #ifdef CONFIG_SCHED_SMT
			
 
				+ 	int sibling;
			
 
				+@@ -1767,7 +1774,7 @@ static inline int numa_idle_core(int idle_core, int cpu)
			
 
				+ 	 * Prefer cores instead of packing HT siblings
			
 
				+ 	 * and triggering future load balancing.
			
 
				+ 	 */
			
 
				+-	if (is_core_idle(cpu))
			
 
				++	if (sched_smt_siblings_idle(cpu))
			
 
				+ 		idle_core = cpu;
			
 
				+ 
			
 
				+ 	return idle_core;
			
 
				+@@ -9388,7 +9395,8 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds,  struct sg_lb_stats *sgs
			
 
				+ 	 * If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
			
 
				+ 	 * is not sufficient. We need to make sure the whole core is idle.
			
 
				+ 	 */
			
 
				+-	if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
			
 
				++	if (sds->local->flags & SD_SHARE_CPUCAPACITY &&
			
 
				++	    !sched_smt_siblings_idle(env->dst_cpu))
			
 
				+ 		return false;
			
 
				+ 
			
 
				+ 	/* Only do SMT checks if either local or candidate have SMT siblings. */
			
 
				+@@ -10557,7 +10565,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
			
 
				+ 		    sched_asym_prefer(i, env->dst_cpu) &&
			
 
				+ 		    nr_running == 1) {
			
 
				+ 			if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
			
 
				+-			    (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
			
 
				++			    (!(env->sd->flags & SD_SHARE_CPUCAPACITY) &&
			
 
				++			     sched_smt_siblings_idle(i)))
			
 
				+ 				continue;
			
 
				+ 		}
			
 
				+ 
			
 
				+@@ -10686,7 +10695,7 @@ asym_active_balance(struct lb_env *env)
			
 
				+ 		 * busy sibling.
			
 
				+ 		 */
			
 
				+ 		return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
			
 
				+-		       !is_core_idle(env->src_cpu);
			
 
				++		       !sched_smt_siblings_idle(env->src_cpu);
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	return false;
			
 
				+@@ -11433,7 +11442,7 @@ static void nohz_balancer_kick(struct rq *rq)
			
 
				+ 				 */
			
 
				+ 				if (sd->flags & SD_SHARE_CPUCAPACITY ||
			
 
				+ 				    (!(sd->flags & SD_SHARE_CPUCAPACITY) &&
			
 
				+-				     is_core_idle(i))) {
			
 
				++				     sched_smt_siblings_idle(i))) {
			
 
				+ 					flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
			
 
				+ 					goto unlock;
			
 
				+ 				}
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 7acc78f51465e7ea2b876136a1d99632f3f4ec46 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:11:02 -0800
			
 
				+Subject: [PATCH] thermal: intel: hfi: Implement model-specific checks for task
			
 
				+ classification
			
 
				+
			
 
				+In Alder Lake and Raptor Lake, the result of thread classification is more
			
 
				+accurate when only one SMT sibling is busy. Classification results for
			
 
				+class 2 and 3 are always reliable.
			
 
				+
			
 
				+To avoid unnecessary migrations, only update the class of a task if it has
			
 
				+been the same during 4 consecutive user ticks.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
			
 
				+ 1 file changed, 59 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
			
 
				+index 35d947f47550..fdb53e4cabc1 100644
			
 
				+--- a/drivers/thermal/intel/intel_hfi.c
			
 
				++++ b/drivers/thermal/intel/intel_hfi.c
			
 
				+@@ -40,6 +40,7 @@
			
 
				+ #include <linux/workqueue.h>
			
 
				+ 
			
 
				+ #include <asm/msr.h>
			
 
				++#include <asm/intel-family.h>
			
 
				+ 
			
 
				+ #include "../thermal_core.h"
			
 
				+ #include "intel_hfi.h"
			
 
				+@@ -209,9 +210,64 @@ static int __percpu *hfi_ipcc_scores;
			
 
				+  */
			
 
				+ #define HFI_UNCLASSIFIED_DEFAULT 1
			
 
				+ 
			
 
				++#define CLASS_DEBOUNCER_SKIPS 4
			
 
				++
			
 
				++/**
			
 
				++ * debounce_and_update_class() - Process and update a task's classification
			
 
				++ *
			
 
				++ * @p:		The task of which the classification will be updated
			
 
				++ * @new_ipcc:	The new IPC classification
			
 
				++ *
			
 
				++ * Update the classification of @p with the new value that hardware provides.
			
 
				++ * Only update the classification of @p if it has been the same during
			
 
				++ * CLASS_DEBOUNCER_SKIPS consecutive ticks.
			
 
				++ */
			
 
				++static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
			
 
				++{
			
 
				++	u16 debounce_skip;
			
 
				++
			
 
				++	/* The class of @p changed. Only restart the debounce counter. */
			
 
				++	if (p->ipcc_tmp != new_ipcc) {
			
 
				++		p->ipcc_cntr = 1;
			
 
				++		goto out;
			
 
				++	}
			
 
				++
			
 
				++	/*
			
 
				++	 * The class of @p did not change. Update it if it has been the same
			
 
				++	 * for CLASS_DEBOUNCER_SKIPS user ticks.
			
 
				++	 */
			
 
				++	debounce_skip = p->ipcc_cntr + 1;
			
 
				++	if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
			
 
				++		p->ipcc_cntr++;
			
 
				++	else
			
 
				++		p->ipcc = new_ipcc;
			
 
				++
			
 
				++out:
			
 
				++	p->ipcc_tmp = new_ipcc;
			
 
				++}
			
 
				++
			
 
				++static bool classification_is_accurate(u8 hfi_class, bool smt_siblings_idle)
			
 
				++{
			
 
				++	switch (boot_cpu_data.x86_model) {
			
 
				++	case INTEL_FAM6_ALDERLAKE:
			
 
				++	case INTEL_FAM6_ALDERLAKE_L:
			
 
				++	case INTEL_FAM6_RAPTORLAKE:
			
 
				++	case INTEL_FAM6_RAPTORLAKE_P:
			
 
				++	case INTEL_FAM6_RAPTORLAKE_S:
			
 
				++		if (hfi_class == 3 || hfi_class == 2 || smt_siblings_idle)
			
 
				++			return true;
			
 
				++
			
 
				++		return false;
			
 
				++
			
 
				++	default:
			
 
				++		return true;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				+ void intel_hfi_update_ipcc(struct task_struct *curr)
			
 
				+ {
			
 
				+ 	union hfi_thread_feedback_char_msr msr;
			
 
				++	bool idle;
			
 
				+ 
			
 
				+ 	/* We should not be here if ITD is not supported. */
			
 
				+ 	if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
			
 
				+@@ -227,7 +283,9 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
			
 
				+ 	 * 0 is a valid classification for Intel Thread Director. A scheduler
			
 
				+ 	 * IPCC class of 0 means that the task is unclassified. Adjust.
			
 
				+ 	 */
			
 
				+-	curr->ipcc = msr.split.classid + 1;
			
 
				++	idle = sched_smt_siblings_idle(task_cpu(curr));
			
 
				++	if (classification_is_accurate(msr.split.classid, idle))
			
 
				++		debounce_and_update_class(curr, msr.split.classid + 1);
			
 
				+ }
			
 
				+ 
			
 
				+ unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From a7d1ce079429314c7c2c287a0de5930a90134bb4 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:11:03 -0800
			
 
				+Subject: [PATCH] x86/cpufeatures: Add feature bit for HRESET
			
 
				+
			
 
				+The HRESET instruction prevents the classification of the current task
			
 
				+from influencing the classification of the next task when running serially
			
 
				+on the same logical processor.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ arch/x86/include/asm/cpufeatures.h | 1 +
			
 
				+ arch/x86/include/asm/msr-index.h   | 4 +++-
			
 
				+ arch/x86/kernel/cpu/scattered.c    | 1 +
			
 
				+ 3 files changed, 5 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
			
 
				+index a2f2730737ae..0a64e6bc67b1 100644
			
 
				+--- a/arch/x86/include/asm/cpufeatures.h
			
 
				++++ b/arch/x86/include/asm/cpufeatures.h
			
 
				+@@ -307,6 +307,7 @@
			
 
				+ #define X86_FEATURE_SGX_EDECCSSA	(11*32+18) /* "" SGX EDECCSSA user leaf function */
			
 
				+ #define X86_FEATURE_CALL_DEPTH		(11*32+19) /* "" Call depth tracking for RSB stuffing */
			
 
				+ #define X86_FEATURE_MSR_TSX_CTRL	(11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
			
 
				++#define X86_FEATURE_HRESET		(11*32+23) /* Hardware history reset instruction */
			
 
				+ 
			
 
				+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
			
 
				+ #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
			
 
				+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
			
 
				+index d83437d3473d..ce8b78d77588 100644
			
 
				+--- a/arch/x86/include/asm/msr-index.h
			
 
				++++ b/arch/x86/include/asm/msr-index.h
			
 
				+@@ -1098,6 +1098,9 @@
			
 
				+ #define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
			
 
				+ #define MSR_IA32_HW_FEEDBACK_CHAR	0x17d2
			
 
				+ 
			
 
				++/* Hardware History Reset  */
			
 
				++#define MSR_IA32_HW_HRESET_ENABLE	0x17da
			
 
				++
			
 
				+ /* x2APIC locked status */
			
 
				+ #define MSR_IA32_XAPIC_DISABLE_STATUS	0xBD
			
 
				+ #define LEGACY_XAPIC_DISABLED		BIT(0) /*
			
 
				+@@ -1105,5 +1108,4 @@
			
 
				+ 						* disabling x2APIC will cause
			
 
				+ 						* a #GP
			
 
				+ 						*/
			
 
				+-
			
 
				+ #endif /* _ASM_X86_MSR_INDEX_H */
			
 
				+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
			
 
				+index f53944fb8f7f..66bc5713644d 100644
			
 
				+--- a/arch/x86/kernel/cpu/scattered.c
			
 
				++++ b/arch/x86/kernel/cpu/scattered.c
			
 
				+@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
			
 
				+ 	{ X86_FEATURE_EPB,		CPUID_ECX,  3, 0x00000006, 0 },
			
 
				+ 	{ X86_FEATURE_INTEL_PPIN,	CPUID_EBX,  0, 0x00000007, 1 },
			
 
				+ 	{ X86_FEATURE_RRSBA_CTRL,	CPUID_EDX,  2, 0x00000007, 2 },
			
 
				++	{ X86_FEATURE_HRESET,		CPUID_EAX, 22, 0x00000007, 1 },
			
 
				+ 	{ X86_FEATURE_CQM_LLC,		CPUID_EDX,  1, 0x0000000f, 0 },
			
 
				+ 	{ X86_FEATURE_CQM_OCCUP_LLC,	CPUID_EDX,  0, 0x0000000f, 1 },
			
 
				+ 	{ X86_FEATURE_CQM_MBM_TOTAL,	CPUID_EDX,  1, 0x0000000f, 1 },
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 8ee8e3c510cb4a284738d65df270e9d8ddbfc67f Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:11:04 -0800
			
 
				+Subject: [PATCH] x86/hreset: Configure history reset
			
 
				+
			
 
				+Configure the MSR that controls the behavior of HRESET on each logical
			
 
				+processor.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ arch/x86/kernel/cpu/common.c | 23 ++++++++++++++++++++++-
			
 
				+ 1 file changed, 22 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
			
 
				+index f3cc7699e1e1..a2de5736099e 100644
			
 
				+--- a/arch/x86/kernel/cpu/common.c
			
 
				++++ b/arch/x86/kernel/cpu/common.c
			
 
				+@@ -412,6 +412,26 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
			
 
				+ 	cr4_clear_bits(X86_CR4_UMIP);
			
 
				+ }
			
 
				+ 
			
 
				++static u32 hardware_history_features __ro_after_init;
			
 
				++
			
 
				++static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
			
 
				++{
			
 
				++	if (!cpu_feature_enabled(X86_FEATURE_HRESET))
			
 
				++		return;
			
 
				++
			
 
				++	/*
			
 
				++	 * Use on all CPUs the hardware history features that the boot
			
 
				++	 * CPU supports.
			
 
				++	 */
			
 
				++	if (c == &boot_cpu_data)
			
 
				++		hardware_history_features = cpuid_ebx(0x20);
			
 
				++
			
 
				++	if (!hardware_history_features)
			
 
				++		return;
			
 
				++
			
 
				++	wrmsrl(MSR_IA32_HW_HRESET_ENABLE, hardware_history_features);
			
 
				++}
			
 
				++
			
 
				+ /* These bits should not change their value after CPU init is finished. */
			
 
				+ static const unsigned long cr4_pinned_mask =
			
 
				+ 	X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
			
 
				+@@ -1849,10 +1869,11 @@ static void identify_cpu(struct cpuinfo_x86 *c)
			
 
				+ 	/* Disable the PN if appropriate */
			
 
				+ 	squash_the_stupid_serial_number(c);
			
 
				+ 
			
 
				+-	/* Set up SMEP/SMAP/UMIP */
			
 
				++	/* Set up SMEP/SMAP/UMIP/HRESET */
			
 
				+ 	setup_smep(c);
			
 
				+ 	setup_smap(c);
			
 
				+ 	setup_umip(c);
			
 
				++	setup_hreset(c);
			
 
				+ 
			
 
				+ 	/* Enable FSGSBASE instructions if available. */
			
 
				+ 	if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
			
 
				+-- 
			
 
				+2.39.2
			
 
				+
			
 
				+From 4a25b2ad89edfc72bf3f3d1b2cc96916a229ac60 Mon Sep 17 00:00:00 2001
			
 
				+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Date: Mon, 6 Feb 2023 21:11:05 -0800
			
 
				+Subject: [PATCH] x86/process: Reset hardware history in context switch
			
 
				+
			
 
				+Reset the classification history of the current task when switching to the
			
 
				+next task. Hardware will start the classification of the next task from
			
 
				+scratch.
			
 
				+
			
 
				+Cc: Ben Segall <bsegall@google.com>
			
 
				+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
			
 
				+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
			
 
				+Cc: Ionela Voinescu <ionela.voinescu@arm.com>
			
 
				+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
			
 
				+Cc: Len Brown <len.brown@intel.com>
			
 
				+Cc: Lukasz Luba <lukasz.luba@arm.com>
			
 
				+Cc: Mel Gorman <mgorman@suse.de>
			
 
				+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
			
 
				+Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
			
 
				+Cc: Steven Rostedt <rostedt@goodmis.org>
			
 
				+Cc: Tim C. Chen <tim.c.chen@intel.com>
			
 
				+Cc: Valentin Schneider <vschneid@redhat.com>
			
 
				+Cc: x86@kernel.org
			
 
				+Cc: linux-pm@vger.kernel.org
			
 
				+Cc: linux-kernel@vger.kernel.org
			
 
				+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
			
 
				+Patchset: intel-thread-director
			
 
				+---
			
 
				+ arch/x86/include/asm/hreset.h | 30 ++++++++++++++++++++++++++++++
			
 
				+ arch/x86/kernel/cpu/common.c  |  7 +++++++
			
 
				+ arch/x86/kernel/process_32.c  |  3 +++
			
 
				+ arch/x86/kernel/process_64.c  |  3 +++
			
 
				+ 4 files changed, 43 insertions(+)
			
 
				+ create mode 100644 arch/x86/include/asm/hreset.h
			
 
				+
			
 
				+diff --git a/arch/x86/include/asm/hreset.h b/arch/x86/include/asm/hreset.h
			
 
				+new file mode 100644
			
 
				+index 000000000000..d68ca2fb8642
			
 
				+--- /dev/null
			
 
				++++ b/arch/x86/include/asm/hreset.h
			
 
				+@@ -0,0 +1,30 @@
			
 
				++/* SPDX-License-Identifier: GPL-2.0 */
			
 
				++#ifndef _ASM_X86_HRESET_H
			
 
				++
			
 
				++/**
			
 
				++ * HRESET - History reset. Available since binutils v2.36.
			
 
				++ *
			
 
				++ * Request the processor to reset the history of task classification on the
			
 
				++ * current logical processor. The history components to be
			
 
				++ * reset are specified in %eax. Only bits specified in CPUID(0x20).EBX
			
 
				++ * and enabled in the IA32_HRESET_ENABLE MSR can be selected.
			
 
				++ *
			
 
				++ * The assembly code looks like:
			
 
				++ *
			
 
				++ *	hreset %eax
			
 
				++ *
			
 
				++ * The corresponding machine code looks like:
			
 
				++ *
			
 
				++ *	F3 0F 3A F0 ModRM Imm
			
 
				++ *
			
 
				++ * The value of ModRM is 0xc0 to specify %eax register addressing.
			
 
				++ * The ignored immediate operand is set to 0.
			
 
				++ *
			
 
				++ * The instruction is documented in the Intel SDM.
			
 
				++ */
			
 
				++
			
 
				++#define __ASM_HRESET  ".byte 0xf3, 0xf, 0x3a, 0xf0, 0xc0, 0x0"
			
 
				++
			
 
				++void reset_hardware_history(void);
			
 
				++
			
 
				++#endif /* _ASM_X86_HRESET_H */
			
 
				+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
			
 
				+index a2de5736099e..2aaf2320b149 100644
			
 
				+--- a/arch/x86/kernel/cpu/common.c
			
 
				++++ b/arch/x86/kernel/cpu/common.c
			
 
				+@@ -53,6 +53,7 @@
			
 
				+ #include <asm/mce.h>
			
 
				+ #include <asm/msr.h>
			
 
				+ #include <asm/cacheinfo.h>
			
 
				++#include <asm/hreset.h>
			
 
				+ #include <asm/memtype.h>
			
 
				+ #include <asm/microcode.h>
			
 
				+ #include <asm/microcode_intel.h>
			
 
				+@@ -414,6 +415,12 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
			
 
				+ 
			
 
				+ static u32 hardware_history_features __ro_after_init;
			
 
				+ 
			
 
				++void reset_hardware_history(void)
			
 
				++{
			
 
				++	asm_inline volatile (ALTERNATIVE("", __ASM_HRESET, X86_FEATURE_HRESET)
			
 
				++			     : : "a" (hardware_history_features) : "memory");
			
 
				++}
			
 
				++
			
 
				+ static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
			
 
				+ {
			
 
				+ 	if (!cpu_feature_enabled(X86_FEATURE_HRESET))
			
 
				+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
			
 
				+index 470c128759ea..397a6e6f4e61 100644
			
 
				+--- a/arch/x86/kernel/process_32.c
			
 
				++++ b/arch/x86/kernel/process_32.c
			
 
				+@@ -52,6 +52,7 @@
			
 
				+ #include <asm/switch_to.h>
			
 
				+ #include <asm/vm86.h>
			
 
				+ #include <asm/resctrl.h>
			
 
				++#include <asm/hreset.h>
			
 
				+ #include <asm/proto.h>
			
 
				+ 
			
 
				+ #include "process.h"
			
 
				+@@ -214,6 +215,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
			
 
				+ 	/* Load the Intel cache allocation PQR MSR. */
			
 
				+ 	resctrl_sched_in();
			
 
				+ 
			
 
				++	reset_hardware_history();
			
 
				++
			
 
				+ 	return prev_p;
			
 
				+ }
			
 
				+ 
			
 
				+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
			
 
				+index 4e34b3b68ebd..6176044ecc16 100644
			
 
				+--- a/arch/x86/kernel/process_64.c
			
 
				++++ b/arch/x86/kernel/process_64.c
			
 
				+@@ -53,6 +53,7 @@
			
 
				+ #include <asm/xen/hypervisor.h>
			
 
				+ #include <asm/vdso.h>
			
 
				+ #include <asm/resctrl.h>
			
 
				++#include <asm/hreset.h>
			
 
				+ #include <asm/unistd.h>
			
 
				+ #include <asm/fsgsbase.h>
			
 
				+ #ifdef CONFIG_IA32_EMULATION
			
 
				+@@ -658,6 +659,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
			
 
				+ 	/* Load the Intel cache allocation PQR MSR. */
			
 
				+ 	resctrl_sched_in();
			
 
				+ 
			
 
				++	reset_hardware_history();
			
 
				++
			
 
				+ 	return prev_p;
			
 
				+ }
			
 
				+ 
			
 
				+-- 
			
 
				+2.39.2
			
 
				+