0015-intel-thread-director.patch 111 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268
  1. From 0289e120424c88695e731293bb4f2816bc1d8da6 Mon Sep 17 00:00:00 2001
  2. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  3. Date: Mon, 6 Feb 2023 20:58:29 -0800
  4. Subject: [PATCH] sched/fair: Generalize asym_packing logic for SMT cores
  5. When doing asym_packing load balancing between cores, all we care is that
  6. the destination core is fully idle (including SMT siblings, if any) and
  7. that the busiest candidate scheduling group has exactly one busy CPU. It is
  8. irrelevant whether the candidate busiest core is non-SMT, SMT2, SMT4, SMT8,
  9. etc.
  10. Do not handle the candidate busiest non-SMT vs SMT cases separately. Simply
  11. do the two checks described above. Let find_busiest_group() handle bigger
  12. imbalances in the number of idle CPUs.
  13. Cc: Ben Segall <bsegall@google.com>
  14. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  15. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  16. Cc: Len Brown <len.brown@intel.com>
  17. Cc: Mel Gorman <mgorman@suse.de>
  18. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  19. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  20. Cc: Steven Rostedt <rostedt@goodmis.org>
  21. Cc: Tim C. Chen <tim.c.chen@intel.com>
  22. Cc: Valentin Schneider <vschneid@redhat.com>
  23. Cc: x86@kernel.org
  24. Cc: linux-kernel@vger.kernel.org
  25. Reviewed-by: Len Brown <len.brown@intel.com>
  26. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  27. Tested-by: Zhang Rui <rui.zhang@intel.com>
  28. Patchset: intel-thread-director
  29. ---
  30. kernel/sched/fair.c | 41 ++++++++++++++---------------------------
  31. 1 file changed, 14 insertions(+), 27 deletions(-)
  32. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  33. index 0f87369914274..4509086a60a0d 100644
  34. --- a/kernel/sched/fair.c
  35. +++ b/kernel/sched/fair.c
  36. @@ -9124,13 +9124,11 @@ group_type group_classify(unsigned int imbalance_pct,
  37. * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
  38. * only if @dst_cpu has higher priority.
  39. *
  40. - * If both @dst_cpu and @sg have SMT siblings, and @sg has exactly one more
  41. - * busy CPU than @sds::local, let @dst_cpu pull tasks if it has higher priority.
  42. - * Bigger imbalances in the number of busy CPUs will be dealt with in
  43. - * update_sd_pick_busiest().
  44. - *
  45. - * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
  46. - * of @dst_cpu are idle and @sg has lower priority.
  47. + * If @dst_cpu has SMT siblings, check if there are no running tasks in
  48. + * @sds::local. In such case, decide based on the priority of @sg. Do it only
  49. + * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
  50. + * imbalances in the number of busy CPUs will be dealt with in
  51. + * find_busiest_group().
  52. *
  53. * Return: true if @dst_cpu can pull tasks, false otherwise.
  54. */
  55. @@ -9139,12 +9137,10 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
  56. struct sched_group *sg)
  57. {
  58. #ifdef CONFIG_SCHED_SMT
  59. - bool local_is_smt, sg_is_smt;
  60. + bool local_is_smt;
  61. int sg_busy_cpus;
  62. local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
  63. - sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
  64. -
  65. sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
  66. if (!local_is_smt) {
  67. @@ -9165,25 +9161,16 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
  68. return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  69. }
  70. - /* @dst_cpu has SMT siblings. */
  71. -
  72. - if (sg_is_smt) {
  73. - int local_busy_cpus = sds->local->group_weight -
  74. - sds->local_stat.idle_cpus;
  75. - int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
  76. -
  77. - if (busy_cpus_delta == 1)
  78. - return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  79. -
  80. - return false;
  81. - }
  82. -
  83. /*
  84. - * @sg does not have SMT siblings. Ensure that @sds::local does not end
  85. - * up with more than one busy SMT sibling and only pull tasks if there
  86. - * are not busy CPUs (i.e., no CPU has running tasks).
  87. + * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
  88. + * all its siblings are idle (moving tasks between physical cores in
  89. + * which some SMT siblings are busy results in the same throughput).
  90. + *
  91. + * If the difference in the number of busy CPUs is two or more, let
  92. + * find_busiest_group() take care of it. We only care if @sg has
  93. + * exactly one busy CPU. This covers SMT and non-SMT sched groups.
  94. */
  95. - if (!sds->local_stat.sum_nr_running)
  96. + if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
  97. return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  98. return false;
  99. --
  100. 2.39.2
  101. From e9da5836d3052648536258be7fbaec9f2f15862e Mon Sep 17 00:00:00 2001
  102. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  103. Date: Mon, 6 Feb 2023 20:58:30 -0800
  104. Subject: [PATCH] sched/fair: Move is_core_idle() out of CONFIG_NUMA
  105. asym_packing needs this function to determine whether an SMT core is a
  106. suitable destination for load balancing.
  107. Cc: Ben Segall <bsegall@google.com>
  108. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  109. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  110. Cc: Len Brown <len.brown@intel.com>
  111. Cc: Mel Gorman <mgorman@suse.de>
  112. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  113. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  114. Cc: Steven Rostedt <rostedt@goodmis.org>
  115. Cc: Tim C. Chen <tim.c.chen@intel.com>
  116. Cc: Valentin Schneider <vschneid@redhat.com>
  117. Cc: x86@kernel.org
  118. Cc: linux-kernel@vger.kernel.org
  119. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  120. Tested-by: Zhang Rui <rui.zhang@intel.com>
  121. Patchset: intel-thread-director
  122. ---
  123. kernel/sched/fair.c | 34 +++++++++++++++++-----------------
  124. 1 file changed, 17 insertions(+), 17 deletions(-)
  125. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  126. index 4509086a60a0d..d58df9c6a88c4 100644
  127. --- a/kernel/sched/fair.c
  128. +++ b/kernel/sched/fair.c
  129. @@ -1064,6 +1064,23 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  130. * Scheduling class queueing methods:
  131. */
  132. +static inline bool is_core_idle(int cpu)
  133. +{
  134. +#ifdef CONFIG_SCHED_SMT
  135. + int sibling;
  136. +
  137. + for_each_cpu(sibling, cpu_smt_mask(cpu)) {
  138. + if (cpu == sibling)
  139. + continue;
  140. +
  141. + if (!idle_cpu(sibling))
  142. + return false;
  143. + }
  144. +#endif
  145. +
  146. + return true;
  147. +}
  148. +
  149. #ifdef CONFIG_NUMA
  150. #define NUMA_IMBALANCE_MIN 2
  151. @@ -1700,23 +1717,6 @@ struct numa_stats {
  152. int idle_cpu;
  153. };
  154. -static inline bool is_core_idle(int cpu)
  155. -{
  156. -#ifdef CONFIG_SCHED_SMT
  157. - int sibling;
  158. -
  159. - for_each_cpu(sibling, cpu_smt_mask(cpu)) {
  160. - if (cpu == sibling)
  161. - continue;
  162. -
  163. - if (!idle_cpu(sibling))
  164. - return false;
  165. - }
  166. -#endif
  167. -
  168. - return true;
  169. -}
  170. -
  171. struct task_numa_env {
  172. struct task_struct *p;
  173. --
  174. 2.39.2
  175. From e0ad77720e1ed2dc413aa9229442e8df0ee0f6ac Mon Sep 17 00:00:00 2001
  176. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  177. Date: Mon, 6 Feb 2023 20:58:31 -0800
  178. Subject: [PATCH] sched/fair: Only do asym_packing load balancing from fully
  179. idle SMT cores
  180. When balancing load between cores, all the SMT siblings of the destination
  181. CPU, if any, must be idle. Otherwise, pulling new tasks degrades the
  182. throughput of the busy SMT siblings. The overall throughput of the system
  183. remains the same.
  184. When balancing load within an SMT core this consideration is not relevant
  185. relevant. Follow the priorities that hardware indicates.
  186. Using is_core_idle() renders checking !sds->local_stat.sum_nr_running
  187. redundant. Remove it.
  188. Cc: Ben Segall <bsegall@google.com>
  189. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  190. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  191. Cc: Len Brown <len.brown@intel.com>
  192. Cc: Mel Gorman <mgorman@suse.de>
  193. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  194. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  195. Cc: Steven Rostedt <rostedt@goodmis.org>
  196. Cc: Tim C. Chen <tim.c.chen@intel.com>
  197. Cc: Valentin Schneider <vschneid@redhat.com>
  198. Cc: x86@kernel.org
  199. Cc: linux-kernel@vger.kernel.org
  200. Suggested-by: Valentin Schneider <vschneid@redhat.com>
  201. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  202. Tested-by: Zhang Rui <rui.zhang@intel.com>
  203. Patchset: intel-thread-director
  204. ---
  205. kernel/sched/fair.c | 34 +++++++++++++++++++++++++---------
  206. 1 file changed, 25 insertions(+), 9 deletions(-)
  207. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  208. index d58df9c6a88c4..1b134a2f0585b 100644
  209. --- a/kernel/sched/fair.c
  210. +++ b/kernel/sched/fair.c
  211. @@ -9120,12 +9120,14 @@ group_type group_classify(unsigned int imbalance_pct,
  212. * Check the state of the SMT siblings of both @sds::local and @sg and decide
  213. * if @dst_cpu can pull tasks.
  214. *
  215. + * This function must be called only if all the SMT siblings of @dst_cpu are
  216. + * idle, if any.
  217. + *
  218. * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
  219. * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
  220. * only if @dst_cpu has higher priority.
  221. *
  222. - * If @dst_cpu has SMT siblings, check if there are no running tasks in
  223. - * @sds::local. In such case, decide based on the priority of @sg. Do it only
  224. + * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
  225. * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
  226. * imbalances in the number of busy CPUs will be dealt with in
  227. * find_busiest_group().
  228. @@ -9162,15 +9164,13 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
  229. }
  230. /*
  231. - * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
  232. - * all its siblings are idle (moving tasks between physical cores in
  233. - * which some SMT siblings are busy results in the same throughput).
  234. + * @dst_cpu has SMT siblings and are also idle.
  235. *
  236. * If the difference in the number of busy CPUs is two or more, let
  237. * find_busiest_group() take care of it. We only care if @sg has
  238. * exactly one busy CPU. This covers SMT and non-SMT sched groups.
  239. */
  240. - if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
  241. + if (sg_busy_cpus == 1)
  242. return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  243. return false;
  244. @@ -9184,7 +9184,14 @@ static inline bool
  245. sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs,
  246. struct sched_group *group)
  247. {
  248. - /* Only do SMT checks if either local or candidate have SMT siblings */
  249. + /*
  250. + * If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
  251. + * is not sufficient. We need to make sure the whole core is idle.
  252. + */
  253. + if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
  254. + return false;
  255. +
  256. + /* Only do SMT checks if either local or candidate have SMT siblings. */
  257. if ((sds->local->flags & SD_SHARE_CPUCAPACITY) ||
  258. (group->flags & SD_SHARE_CPUCAPACITY))
  259. return asym_smt_can_pull_tasks(env->dst_cpu, sds, sgs, group);
  260. @@ -11131,8 +11138,17 @@ static void nohz_balancer_kick(struct rq *rq)
  261. */
  262. for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
  263. if (sched_asym_prefer(i, cpu)) {
  264. - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
  265. - goto unlock;
  266. + /*
  267. + * Always do ASYM_PACKING balance in the SMT
  268. + * domain. In upper domains, the core must be
  269. + * fully idle.
  270. + */
  271. + if (sd->flags & SD_SHARE_CPUCAPACITY ||
  272. + (!(sd->flags & SD_SHARE_CPUCAPACITY) &&
  273. + is_core_idle(i))) {
  274. + flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
  275. + goto unlock;
  276. + }
  277. }
  278. }
  279. }
  280. --
  281. 2.39.2
  282. From 6894e2e70bb2dfe0a96d65a70c1e9a4005528211 Mon Sep 17 00:00:00 2001
  283. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  284. Date: Mon, 6 Feb 2023 20:58:32 -0800
  285. Subject: [PATCH] sched/fair: Let low-priority cores help high-priority busy
  286. SMT cores
  287. Using asym_packing priorities within an SMT core is straightforward. Just
  288. follow the priorities that hardware indicates.
  289. When balancing load from an SMT core, also consider the idle of its
  290. siblings. Priorities do not reflect that an SMT core divides its throughput
  291. among all its busy siblings. They only makes sense when exactly one sibling
  292. is busy.
  293. Indicate that active balance is needed if the destination CPU has lower
  294. priority than the source CPU but the latter has busy SMT siblings.
  295. Make find_busiest_queue() not skip higher-priority SMT cores with more than
  296. busy sibling.
  297. Cc: Ben Segall <bsegall@google.com>
  298. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  299. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  300. Cc: Len Brown <len.brown@intel.com>
  301. Cc: Mel Gorman <mgorman@suse.de>
  302. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  303. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  304. Cc: Steven Rostedt <rostedt@goodmis.org>
  305. Cc: Tim C. Chen <tim.c.chen@intel.com>
  306. Cc: Valentin Schneider <vschneid@redhat.com>
  307. Cc: x86@kernel.org
  308. Cc: linux-kernel@vger.kernel.org
  309. Suggested-by: Valentin Schneider <vschneid@redhat.com>
  310. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  311. Tested-by: Zhang Rui <rui.zhang@intel.com>
  312. Patchset: intel-thread-director
  313. ---
  314. kernel/sched/fair.c | 31 ++++++++++++++++++++++++++-----
  315. 1 file changed, 26 insertions(+), 5 deletions(-)
  316. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  317. index 1b134a2f0585b..1255d99877fea 100644
  318. --- a/kernel/sched/fair.c
  319. +++ b/kernel/sched/fair.c
  320. @@ -10306,11 +10306,20 @@ static struct rq *find_busiest_queue(struct lb_env *env,
  321. nr_running == 1)
  322. continue;
  323. - /* Make sure we only pull tasks from a CPU of lower priority */
  324. + /*
  325. + * Make sure we only pull tasks from a CPU of lower priority
  326. + * when balancing between SMT siblings.
  327. + *
  328. + * If balancing between cores, let lower priority CPUs help
  329. + * SMT cores with more than one busy sibling.
  330. + */
  331. if ((env->sd->flags & SD_ASYM_PACKING) &&
  332. sched_asym_prefer(i, env->dst_cpu) &&
  333. - nr_running == 1)
  334. - continue;
  335. + nr_running == 1) {
  336. + if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
  337. + (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
  338. + continue;
  339. + }
  340. switch (env->migration_type) {
  341. case migrate_load:
  342. @@ -10400,8 +10409,20 @@ asym_active_balance(struct lb_env *env)
  343. * lower priority CPUs in order to pack all tasks in the
  344. * highest priority CPUs.
  345. */
  346. - return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
  347. - sched_asym_prefer(env->dst_cpu, env->src_cpu);
  348. + if (env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING)) {
  349. + /* Always obey priorities between SMT siblings. */
  350. + if (env->sd->flags & SD_SHARE_CPUCAPACITY)
  351. + return sched_asym_prefer(env->dst_cpu, env->src_cpu);
  352. +
  353. + /*
  354. + * A lower priority CPU can help an SMT core with more than one
  355. + * busy sibling.
  356. + */
  357. + return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
  358. + !is_core_idle(env->src_cpu);
  359. + }
  360. +
  361. + return false;
  362. }
  363. static inline bool
  364. --
  365. 2.39.2
  366. From aacb4416f1e6e04c9ef67e06855b7a4c26d33e3d Mon Sep 17 00:00:00 2001
  367. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  368. Date: Mon, 6 Feb 2023 20:58:33 -0800
  369. Subject: [PATCH] sched/fair: Keep a fully_busy SMT sched group as busiest
  370. When comparing two fully_busy scheduling groups, keep the current busiest
  371. group if it represents an SMT core. Tasks in such scheduling group share
  372. CPU resources and need more help than tasks in a non-SMT fully_busy group.
  373. Cc: Ben Segall <bsegall@google.com>
  374. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  375. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  376. Cc: Len Brown <len.brown@intel.com>
  377. Cc: Mel Gorman <mgorman@suse.de>
  378. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  379. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  380. Cc: Steven Rostedt <rostedt@goodmis.org>
  381. Cc: Tim C. Chen <tim.c.chen@intel.com>
  382. Cc: Valentin Schneider <vschneid@redhat.com>
  383. Cc: x86@kernel.org
  384. Cc: linux-kernel@vger.kernel.org
  385. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  386. Tested-by: Zhang Rui <rui.zhang@intel.com>
  387. Patchset: intel-thread-director
  388. ---
  389. kernel/sched/fair.c | 16 ++++++++++++++--
  390. 1 file changed, 14 insertions(+), 2 deletions(-)
  391. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  392. index 1255d99877fea..ed1f13fa32f86 100644
  393. --- a/kernel/sched/fair.c
  394. +++ b/kernel/sched/fair.c
  395. @@ -9384,10 +9384,22 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  396. * contention when accessing shared HW resources.
  397. *
  398. * XXX for now avg_load is not computed and always 0 so we
  399. - * select the 1st one.
  400. + * select the 1st one, except if @sg is composed of SMT
  401. + * siblings.
  402. */
  403. - if (sgs->avg_load <= busiest->avg_load)
  404. +
  405. + if (sgs->avg_load < busiest->avg_load)
  406. return false;
  407. +
  408. + if (sgs->avg_load == busiest->avg_load) {
  409. + /*
  410. + * SMT sched groups need more help than non-SMT groups.
  411. + * If @sg happens to also be SMT, either choice is good.
  412. + */
  413. + if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
  414. + return false;
  415. + }
  416. +
  417. break;
  418. case group_has_spare:
  419. --
  420. 2.39.2
  421. From 6a40621091eafca8bc7d4ac2f178971046744a58 Mon Sep 17 00:00:00 2001
  422. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  423. Date: Mon, 6 Feb 2023 20:58:34 -0800
  424. Subject: [PATCH] sched/fair: Use the prefer_sibling flag of the current sched
  425. domain
  426. SD_PREFER_SIBLING is set from the SMT scheduling domain up to the first
  427. non-NUMA domain (the exception is systems with SD_ASYM_CPUCAPACITY).
  428. Above the SMT sched domain, all domains have a child. The SD_PREFER_
  429. SIBLING is honored always regardless of the scheduling domain at which the
  430. load balance takes place.
  431. There are cases, however, in which the busiest CPU's sched domain has
  432. child but the destination CPU's does not. Consider, for instance a non-SMT
  433. core (or an SMT core with only one online sibling) doing load balance with
  434. an SMT core at the MC level. SD_PREFER_SIBLING will not be honored. We are
  435. left with a fully busy SMT core and an idle non-SMT core.
  436. Avoid inconsistent behavior. Use the prefer_sibling behavior at the current
  437. scheduling domain, not its child.
  438. The NUMA sched domain does not have the SD_PREFER_SIBLING flag. Thus, we
  439. will not spread load among NUMA sched groups, as desired.
  440. Cc: Ben Segall <bsegall@google.com>
  441. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  442. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  443. Cc: Len Brown <len.brown@intel.com>
  444. Cc: Mel Gorman <mgorman@suse.de>
  445. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  446. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  447. Cc: Steven Rostedt <rostedt@goodmis.org>
  448. Cc: Tim C. Chen <tim.c.chen@intel.com>
  449. Cc: Valentin Schneider <vschneid@redhat.com>
  450. Cc: x86@kernel.org
  451. Cc: linux-kernel@vger.kernel.org
  452. Suggested-by: Valentin Schneider <vschneid@redhat.com>
  453. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  454. Tested-by: Zhang Rui <rui.zhang@intel.com>
  455. Patchset: intel-thread-director
  456. ---
  457. kernel/sched/fair.c | 10 +++++-----
  458. 1 file changed, 5 insertions(+), 5 deletions(-)
  459. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  460. index ed1f13fa32f86..9d94ba3f67269 100644
  461. --- a/kernel/sched/fair.c
  462. +++ b/kernel/sched/fair.c
  463. @@ -9874,7 +9874,6 @@ static void update_idle_cpu_scan(struct lb_env *env,
  464. static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)
  465. {
  466. - struct sched_domain *child = env->sd->child;
  467. struct sched_group *sg = env->sd->groups;
  468. struct sg_lb_stats *local = &sds->local_stat;
  469. struct sg_lb_stats tmp_sgs;
  470. @@ -9915,9 +9914,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
  471. sg = sg->next;
  472. } while (sg != env->sd->groups);
  473. - /* Tag domain that child domain prefers tasks go to siblings first */
  474. - sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
  475. -
  476. + /*
  477. + * Tag domain that @env::sd prefers to spread excess tasks among
  478. + * sibling sched groups.
  479. + */
  480. + sds->prefer_sibling = env->sd->flags & SD_PREFER_SIBLING;
  481. if (env->sd->flags & SD_NUMA)
  482. env->fbq_type = fbq_classify_group(&sds->busiest_stat);
  483. @@ -10216,7 +10217,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
  484. goto out_balanced;
  485. }
  486. - /* Try to move all excess tasks to child's sibling domain */
  487. if (sds.prefer_sibling && local->group_type == group_has_spare &&
  488. busiest->sum_nr_running > local->sum_nr_running + 1)
  489. goto force_balance;
  490. --
  491. 2.39.2
  492. From b35c1dc0c7b494d014ffbc6e310506fb8c1b3457 Mon Sep 17 00:00:00 2001
  493. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  494. Date: Mon, 6 Feb 2023 20:58:35 -0800
  495. Subject: [PATCH] sched/fair: Do not even the number of busy CPUs via
  496. asym_packing
  497. Now that find_busiest_group() triggers load balancing between a fully_
  498. busy SMT2 core and an idle non-SMT core, it is no longer needed to force
  499. balancing via asym_packing. Use asym_packing only as intended: when there
  500. is high-priority CPU that is idle.
  501. After this change, the same logic apply to SMT and non-SMT local groups.
  502. Simplify asym_smt_can_pull_tasks() accordingly.
  503. Cc: Ben Segall <bsegall@google.com>
  504. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  505. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  506. Cc: Len Brown <len.brown@intel.com>
  507. Cc: Mel Gorman <mgorman@suse.de>
  508. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  509. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  510. Cc: Steven Rostedt <rostedt@goodmis.org>
  511. Cc: Tim C. Chen <tim.c.chen@intel.com>
  512. Cc: Valentin Schneider <vschneid@redhat.com>
  513. Cc: x86@kernel.org
  514. Cc: linux-kernel@vger.kernel.org
  515. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  516. Tested-by: Zhang Rui <rui.zhang@intel.com>
  517. Patchset: intel-thread-director
  518. ---
  519. kernel/sched/fair.c | 37 +++++--------------------------------
  520. 1 file changed, 5 insertions(+), 32 deletions(-)
  521. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  522. index 9d94ba3f67269..e5079ee882ff8 100644
  523. --- a/kernel/sched/fair.c
  524. +++ b/kernel/sched/fair.c
  525. @@ -9117,20 +9117,15 @@ group_type group_classify(unsigned int imbalance_pct,
  526. * @sgs: Load-balancing statistics of the candidate busiest group
  527. * @sg: The candidate busiest group
  528. *
  529. - * Check the state of the SMT siblings of both @sds::local and @sg and decide
  530. - * if @dst_cpu can pull tasks.
  531. + * Check the state of the SMT siblings of @sg and decide if @dst_cpu can pull
  532. + * tasks.
  533. *
  534. * This function must be called only if all the SMT siblings of @dst_cpu are
  535. * idle, if any.
  536. *
  537. - * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
  538. - * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
  539. - * only if @dst_cpu has higher priority.
  540. - *
  541. - * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
  542. - * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
  543. - * imbalances in the number of busy CPUs will be dealt with in
  544. - * find_busiest_group().
  545. + * @dst_cpu can pull tasks if @sg has exactly one busy CPU (i.e., one more than
  546. + * @sds::local) and has lower group priority than @sds::local. Bigger imbalances
  547. + * in the number of busy CPUs will be dealt with in find_busiest_group().
  548. *
  549. * Return: true if @dst_cpu can pull tasks, false otherwise.
  550. */
  551. @@ -9139,33 +9134,11 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
  552. struct sched_group *sg)
  553. {
  554. #ifdef CONFIG_SCHED_SMT
  555. - bool local_is_smt;
  556. int sg_busy_cpus;
  557. - local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
  558. sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
  559. - if (!local_is_smt) {
  560. - /*
  561. - * If we are here, @dst_cpu is idle and does not have SMT
  562. - * siblings. Pull tasks if candidate group has two or more
  563. - * busy CPUs.
  564. - */
  565. - if (sg_busy_cpus >= 2) /* implies sg_is_smt */
  566. - return true;
  567. -
  568. - /*
  569. - * @dst_cpu does not have SMT siblings. @sg may have SMT
  570. - * siblings and only one is busy. In such case, @dst_cpu
  571. - * can help if it has higher priority and is idle (i.e.,
  572. - * it has no running tasks).
  573. - */
  574. - return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  575. - }
  576. -
  577. /*
  578. - * @dst_cpu has SMT siblings and are also idle.
  579. - *
  580. * If the difference in the number of busy CPUs is two or more, let
  581. * find_busiest_group() take care of it. We only care if @sg has
  582. * exactly one busy CPU. This covers SMT and non-SMT sched groups.
  583. --
  584. 2.39.2
  585. From a06f6c7fbf4e42b2e8ff963d7b3d963550cc8ea3 Mon Sep 17 00:00:00 2001
  586. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  587. Date: Mon, 6 Feb 2023 20:58:36 -0800
  588. Subject: [PATCH] sched/topology: Remove SHARED_CHILD from ASYM_PACKING
  589. Only x86 and Power7 use ASYM_PACKING. They use it differently.
  590. Power7 has cores of equal priority, but the SMT siblings of a core have
  591. different priorities. Parent scheduling domains do not need (nor have) the
  592. ASYM_PACKING flag. SHARED_CHILD is not needed. Using SHARED_PARENT would
  593. cause the topology debug code to complain.
  594. X86 has cores of different priority, but all the SMT siblings of the core
  595. have equal priority. It needs ASYM_PACKING at the MC level, but not at the
  596. SMT level (it also needs it at upper levels if they have scheduling groups
  597. of different priority). Removing ASYM_PACKING from the SMT domain causes
  598. the topology debug code to complain.
  599. Remove SHARED_CHILD for now. We still need a topology check that satisfies
  600. both architectures.
  601. Cc: Ben Segall <bsegall@google.com>
  602. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  603. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  604. Cc: Len Brown <len.brown@intel.com>
  605. Cc: Mel Gorman <mgorman@suse.de>
  606. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  607. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  608. Cc: Steven Rostedt <rostedt@goodmis.org>
  609. Cc: Tim C. Chen <tim.c.chen@intel.com>
  610. Cc: Valentin Schneider <vschneid@redhat.com>
  611. Cc: x86@kernel.org
  612. Cc: linux-kernel@vger.kernel.org
  613. Suggested-by: Valentin Schneider <vschneid@redhat.com>
  614. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  615. Tested-by: Zhang Rui <rui.zhang@intel.com>
  616. Patchset: intel-thread-director
  617. ---
  618. include/linux/sched/sd_flags.h | 5 +----
  619. 1 file changed, 1 insertion(+), 4 deletions(-)
  620. diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
  621. index 57bde66d95f7a..800238854ba54 100644
  622. --- a/include/linux/sched/sd_flags.h
  623. +++ b/include/linux/sched/sd_flags.h
  624. @@ -132,12 +132,9 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
  625. /*
  626. * Place busy tasks earlier in the domain
  627. *
  628. - * SHARED_CHILD: Usually set on the SMT level. Technically could be set further
  629. - * up, but currently assumed to be set from the base domain
  630. - * upwards (see update_top_cache_domain()).
  631. * NEEDS_GROUPS: Load balancing flag.
  632. */
  633. -SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
  634. +SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS)
  635. /*
  636. * Prefer to place tasks in a sibling domain
  637. --
  638. 2.39.2
  639. From ae1ee00a1f2e7ea4ff86ad6f9fbce736960049f3 Mon Sep 17 00:00:00 2001
  640. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  641. Date: Mon, 6 Feb 2023 20:58:37 -0800
  642. Subject: [PATCH] x86/sched: Remove SD_ASYM_PACKING from the SMT domain flags
  643. There is no difference between any of the SMT siblings of a physical core.
  644. Do not do asym_packing load balancing at this level.
  645. Cc: Ben Segall <bsegall@google.com>
  646. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  647. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  648. Cc: Len Brown <len.brown@intel.com>
  649. Cc: Mel Gorman <mgorman@suse.de>
  650. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  651. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  652. Cc: Steven Rostedt <rostedt@goodmis.org>
  653. Cc: Tim C. Chen <tim.c.chen@intel.com>
  654. Cc: Valentin Schneider <vschneid@redhat.com>
  655. Cc: x86@kernel.org
  656. Cc: linux-kernel@vger.kernel.org
  657. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  658. Tested-by: Zhang Rui <rui.zhang@intel.com>
  659. Patchset: intel-thread-director
  660. ---
  661. arch/x86/kernel/smpboot.c | 2 +-
  662. 1 file changed, 1 insertion(+), 1 deletion(-)
  663. diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
  664. index 55cad72715d99..0213d066a9a96 100644
  665. --- a/arch/x86/kernel/smpboot.c
  666. +++ b/arch/x86/kernel/smpboot.c
  667. @@ -547,7 +547,7 @@ static int x86_core_flags(void)
  668. #ifdef CONFIG_SCHED_SMT
  669. static int x86_smt_flags(void)
  670. {
  671. - return cpu_smt_flags() | x86_sched_itmt_flags();
  672. + return cpu_smt_flags();
  673. }
  674. #endif
  675. #ifdef CONFIG_SCHED_CLUSTER
  676. --
  677. 2.39.2
  678. From 03868cd4806db1cfd95e78ddaa203000b8aad97f Mon Sep 17 00:00:00 2001
  679. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  680. Date: Mon, 6 Feb 2023 20:58:38 -0800
  681. Subject: [PATCH] x86/sched/itmt: Give all SMT siblings of a core the same
  682. priority
  683. X86 does not have the SD_ASYM_PACKING flag in the SMT domain. The scheduler
  684. knows how to handle SMT and non-SMT cores of different priority. There is
  685. no reason for SMT siblings of a core to have different priorities.
  686. Cc: Ben Segall <bsegall@google.com>
  687. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  688. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  689. Cc: Len Brown <len.brown@intel.com>
  690. Cc: Mel Gorman <mgorman@suse.de>
  691. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  692. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  693. Cc: Steven Rostedt <rostedt@goodmis.org>
  694. Cc: Tim C. Chen <tim.c.chen@intel.com>
  695. Cc: Valentin Schneider <vschneid@redhat.com>
  696. Cc: x86@kernel.org
  697. Cc: linux-kernel@vger.kernel.org
  698. Reviewed-by: Len Brown <len.brown@intel.com>
  699. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  700. Tested-by: Zhang Rui <rui.zhang@intel.com>
  701. Patchset: intel-thread-director
  702. ---
  703. arch/x86/kernel/itmt.c | 23 +++++------------------
  704. 1 file changed, 5 insertions(+), 18 deletions(-)
  705. diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
  706. index 9ff480e94511b..6510883c5e817 100644
  707. --- a/arch/x86/kernel/itmt.c
  708. +++ b/arch/x86/kernel/itmt.c
  709. @@ -174,32 +174,19 @@ int arch_asym_cpu_priority(int cpu)
  710. /**
  711. * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
  712. - * @prio: Priority of cpu core
  713. - * @core_cpu: The cpu number associated with the core
  714. + * @prio: Priority of @cpu
  715. + * @cpu: The CPU number
  716. *
  717. * The pstate driver will find out the max boost frequency
  718. * and call this function to set a priority proportional
  719. - * to the max boost frequency. CPU with higher boost
  720. + * to the max boost frequency. CPUs with higher boost
  721. * frequency will receive higher priority.
  722. *
  723. * No need to rebuild sched domain after updating
  724. * the CPU priorities. The sched domains have no
  725. * dependency on CPU priorities.
  726. */
  727. -void sched_set_itmt_core_prio(int prio, int core_cpu)
  728. +void sched_set_itmt_core_prio(int prio, int cpu)
  729. {
  730. - int cpu, i = 1;
  731. -
  732. - for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
  733. - int smt_prio;
  734. -
  735. - /*
  736. - * Ensure that the siblings are moved to the end
  737. - * of the priority chain and only used when
  738. - * all other high priority cpus are out of capacity.
  739. - */
  740. - smt_prio = prio * smp_num_siblings / (i * i);
  741. - per_cpu(sched_core_priority, cpu) = smt_prio;
  742. - i++;
  743. - }
  744. + per_cpu(sched_core_priority, cpu) = prio;
  745. }
  746. --
  747. 2.39.2
  748. From 10a86fa64a25c1156d1de468366708274cdbf6b8 Mon Sep 17 00:00:00 2001
  749. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  750. Date: Mon, 6 Feb 2023 21:10:42 -0800
  751. Subject: [PATCH] sched/task_struct: Introduce IPC classes of tasks
  752. On hybrid processors, the architecture differences between the types of
  753. CPUs lead to different instructions-per-cycle (IPC) on each type of CPU.
  754. IPCs may differ further by the type of instructions. Instructions can be
  755. grouped into classes of similar IPCs.
  756. Hence, tasks can be classified into groups based on the type of
  757. instructions they execute.
  758. Add a new member task_struct::ipcc to associate a particular task to
  759. an IPC class that depends on the instructions it executes.
  760. The scheduler may use the IPC class of a task and data about the
  761. performance among CPUs of a given IPC class to improve throughput. It
  762. may, for instance, place certain classes of tasks on CPUs of higher
  763. performance.
  764. The methods to determine the classification of a task and its relative
  765. IPC score are specific to each CPU architecture.
  766. Cc: Ben Segall <bsegall@google.com>
  767. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  768. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  769. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  770. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  771. Cc: Len Brown <len.brown@intel.com>
  772. Cc: Lukasz Luba <lukasz.luba@arm.com>
  773. Cc: Mel Gorman <mgorman@suse.de>
  774. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  775. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  776. Cc: Steven Rostedt <rostedt@goodmis.org>
  777. Cc: Tim C. Chen <tim.c.chen@intel.com>
  778. Cc: Valentin Schneider <vschneid@redhat.com>
  779. Cc: x86@kernel.org
  780. Cc: linux-pm@vger.kernel.org
  781. Cc: linux-kernel@vger.kernel.org
  782. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  783. Patchset: intel-thread-director
  784. ---
  785. include/linux/sched.h | 10 ++++++++++
  786. init/Kconfig | 12 ++++++++++++
  787. 2 files changed, 22 insertions(+)
  788. diff --git a/include/linux/sched.h b/include/linux/sched.h
  789. index 853d08f7562bd..f292942178850 100644
  790. --- a/include/linux/sched.h
  791. +++ b/include/linux/sched.h
  792. @@ -127,6 +127,8 @@ struct task_group;
  793. __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
  794. TASK_PARKED)
  795. +#define IPC_CLASS_UNCLASSIFIED 0
  796. +
  797. #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
  798. #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
  799. @@ -1522,6 +1524,14 @@ struct task_struct {
  800. union rv_task_monitor rv[RV_PER_TASK_MONITORS];
  801. #endif
  802. +#ifdef CONFIG_IPC_CLASSES
  803. + /*
  804. + * A hardware-defined classification of task that reflects but is
  805. + * not identical to the number of instructions per cycle.
  806. + */
  807. + unsigned short ipcc;
  808. +#endif
  809. +
  810. /*
  811. * New fields for task_struct should be added above here, so that
  812. * they are included in the randomized portion of task_struct.
  813. diff --git a/init/Kconfig b/init/Kconfig
  814. index 44e90b28a30f1..24c5eec9d22e6 100644
  815. --- a/init/Kconfig
  816. +++ b/init/Kconfig
  817. @@ -867,6 +867,18 @@ config UCLAMP_BUCKETS_COUNT
  818. If in doubt, use the default value.
  819. +config IPC_CLASSES
  820. + bool "IPC classes of tasks"
  821. + depends on SMP
  822. + help
  823. + If selected, each task is assigned a classification value that
  824. + reflects the type of instructions that the task executes. This
  825. + classification reflects but is not equal to the number of
  826. + instructions retired per cycle.
  827. +
  828. + The scheduler uses the classification value to improve the placement
  829. + of tasks.
  830. +
  831. endmenu
  832. #
  833. --
  834. 2.39.2
  835. From 11597284e5e583ef060ff6ccc4a3aa619c672d26 Mon Sep 17 00:00:00 2001
  836. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  837. Date: Mon, 6 Feb 2023 21:10:43 -0800
  838. Subject: [PATCH] sched: Add interfaces for IPC classes
  839. Add the interfaces that architectures shall implement to convey the data
  840. to support IPC classes.
  841. arch_update_ipcc() updates the IPC classification of the current task as
  842. given by hardware.
  843. arch_get_ipcc_score() provides a performance score for a given IPC class
  844. when placed on a specific CPU. Higher scores indicate higher performance.
  845. When a driver or equivalent enablement code has configured the necessary
  846. hardware to support IPC classes, it should call sched_enable_ipc_classes()
  847. to notify the scheduler that it can start using IPC classes data.
  848. The number of classes and the score of each class of task are determined
  849. by hardware.
  850. Cc: Ben Segall <bsegall@google.com>
  851. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  852. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  853. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  854. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  855. Cc: Len Brown <len.brown@intel.com>
  856. Cc: Lukasz Luba <lukasz.luba@arm.com>
  857. Cc: Mel Gorman <mgorman@suse.de>
  858. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  859. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  860. Cc: Steven Rostedt <rostedt@goodmis.org>
  861. Cc: Tim C. Chen <tim.c.chen@intel.com>
  862. Cc: Valentin Schneider <vschneid@redhat.com>
  863. Cc: x86@kernel.org
  864. Cc: linux-pm@vger.kernel.org
  865. Cc: linux-kernel@vger.kernel.org
  866. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  867. Patchset: intel-thread-director
  868. ---
  869. include/linux/sched/topology.h | 6 ++++
  870. kernel/sched/sched.h | 66 ++++++++++++++++++++++++++++++++++
  871. kernel/sched/topology.c | 9 +++++
  872. 3 files changed, 81 insertions(+)
  873. diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
  874. index 816df6cc444e1..5b084d3c9ad12 100644
  875. --- a/include/linux/sched/topology.h
  876. +++ b/include/linux/sched/topology.h
  877. @@ -280,4 +280,10 @@ static inline int task_node(const struct task_struct *p)
  878. return cpu_to_node(task_cpu(p));
  879. }
  880. +#ifdef CONFIG_IPC_CLASSES
  881. +extern void sched_enable_ipc_classes(void);
  882. +#else
  883. +static inline void sched_enable_ipc_classes(void) { }
  884. +#endif
  885. +
  886. #endif /* _LINUX_SCHED_TOPOLOGY_H */
  887. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
  888. index 771f8ddb70533..7ab65d3feaa16 100644
  889. --- a/kernel/sched/sched.h
  890. +++ b/kernel/sched/sched.h
  891. @@ -2526,6 +2526,72 @@ void arch_scale_freq_tick(void)
  892. }
  893. #endif
  894. +#ifdef CONFIG_IPC_CLASSES
  895. +DECLARE_STATIC_KEY_FALSE(sched_ipcc);
  896. +
  897. +static inline bool sched_ipcc_enabled(void)
  898. +{
  899. + return static_branch_unlikely(&sched_ipcc);
  900. +}
  901. +
  902. +#ifndef arch_update_ipcc
  903. +/**
  904. + * arch_update_ipcc() - Update the IPC class of the current task
  905. + * @curr: The current task
  906. + *
  907. + * Request that the IPC classification of @curr is updated.
  908. + *
  909. + * Returns: none
  910. + */
  911. +static __always_inline
  912. +void arch_update_ipcc(struct task_struct *curr)
  913. +{
  914. +}
  915. +#endif
  916. +
  917. +#ifndef arch_get_ipcc_score
  918. +
  919. +#define SCHED_IPCC_SCORE_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
  920. +/**
  921. + * arch_get_ipcc_score() - Get the IPC score of a class of task
  922. + * @ipcc: The IPC class
  923. + * @cpu: A CPU number
  924. + *
  925. + * The IPC performance scores reflects (but it is not identical to) the number
  926. + * of instructions retired per cycle for a given IPC class. It is a linear and
  927. + * abstract metric. Higher scores reflect better performance.
  928. + *
  929. + * The IPC score can be normalized with respect to the class, i, with the
  930. + * highest IPC score on the CPU, c, with highest performance:
  931. + *
  932. + * IPC(i, c)
  933. + * ------------------------------------ * SCHED_IPCC_SCORE_SCALE
  934. + * max(IPC(i, c) : (i, c))
  935. + *
  936. + * Scheduling schemes that want to use the IPC score along with other
  937. + * normalized metrics for scheduling (e.g., CPU capacity) may need to normalize
  938. + * it.
  939. + *
  940. + * Other scheduling schemes (e.g., asym_packing) do not need normalization.
  941. + *
  942. + * Returns the performance score of an IPC class, @ipcc, when running on @cpu.
  943. + * Error when either @ipcc or @cpu are invalid.
  944. + */
  945. +static __always_inline
  946. +unsigned long arch_get_ipcc_score(unsigned short ipcc, int cpu)
  947. +{
  948. + return SCHED_IPCC_SCORE_SCALE;
  949. +}
  950. +#endif
  951. +#else /* CONFIG_IPC_CLASSES */
  952. +
  953. +#define arch_get_ipcc_score(ipcc, cpu) (-EINVAL)
  954. +#define arch_update_ipcc(curr)
  955. +
  956. +static inline bool sched_ipcc_enabled(void) { return false; }
  957. +
  958. +#endif /* CONFIG_IPC_CLASSES */
  959. +
  960. #ifndef arch_scale_freq_capacity
  961. /**
  962. * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
  963. diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
  964. index 8739c2a5a54ea..60e03d15f58ca 100644
  965. --- a/kernel/sched/topology.c
  966. +++ b/kernel/sched/topology.c
  967. @@ -670,6 +670,15 @@ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
  968. DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
  969. DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
  970. +#ifdef CONFIG_IPC_CLASSES
  971. +DEFINE_STATIC_KEY_FALSE(sched_ipcc);
  972. +
  973. +void sched_enable_ipc_classes(void)
  974. +{
  975. + static_branch_enable_cpuslocked(&sched_ipcc);
  976. +}
  977. +#endif
  978. +
  979. static void update_top_cache_domain(int cpu)
  980. {
  981. struct sched_domain_shared *sds = NULL;
  982. --
  983. 2.39.2
  984. From db0b0e36404f9b091b52d5c1798ca3e875bf3728 Mon Sep 17 00:00:00 2001
  985. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  986. Date: Mon, 6 Feb 2023 21:10:44 -0800
  987. Subject: [PATCH] sched/core: Initialize the IPC class of a new task
  988. New tasks shall start life as unclassified. They will be classified by
  989. hardware when they run.
  990. Cc: Ben Segall <bsegall@google.com>
  991. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  992. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  993. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  994. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  995. Cc: Len Brown <len.brown@intel.com>
  996. Cc: Lukasz Luba <lukasz.luba@arm.com>
  997. Cc: Mel Gorman <mgorman@suse.de>
  998. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  999. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1000. Cc: Steven Rostedt <rostedt@goodmis.org>
  1001. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1002. Cc: Valentin Schneider <vschneid@redhat.com>
  1003. Cc: x86@kernel.org
  1004. Cc: linux-pm@vger.kernel.org
  1005. Cc: linux-kernel@vger.kernel.org
  1006. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1007. Patchset: intel-thread-director
  1008. ---
  1009. kernel/sched/core.c | 3 +++
  1010. 1 file changed, 3 insertions(+)
  1011. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  1012. index 2a4918a1faa9e..325b1d3cf7a82 100644
  1013. --- a/kernel/sched/core.c
  1014. +++ b/kernel/sched/core.c
  1015. @@ -4424,6 +4424,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
  1016. p->se.prev_sum_exec_runtime = 0;
  1017. p->se.nr_migrations = 0;
  1018. p->se.vruntime = 0;
  1019. +#ifdef CONFIG_IPC_CLASSES
  1020. + p->ipcc = IPC_CLASS_UNCLASSIFIED;
  1021. +#endif
  1022. INIT_LIST_HEAD(&p->se.group_node);
  1023. #ifdef CONFIG_FAIR_GROUP_SCHED
  1024. --
  1025. 2.39.2
  1026. From 8250fc7b2d160a0638603d7575b0516a0ff1340e Mon Sep 17 00:00:00 2001
  1027. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1028. Date: Mon, 6 Feb 2023 21:10:45 -0800
  1029. Subject: [PATCH] sched/core: Add user_tick as argument to scheduler_tick()
  1030. Differentiate between user and kernel ticks so that the scheduler updates
  1031. the IPC class of the current task during the former.
  1032. Cc: Ben Segall <bsegall@google.com>
  1033. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1034. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1035. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1036. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1037. Cc: Len Brown <len.brown@intel.com>
  1038. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1039. Cc: Mel Gorman <mgorman@suse.de>
  1040. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1041. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1042. Cc: Steven Rostedt <rostedt@goodmis.org>
  1043. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1044. Cc: Valentin Schneider <vschneid@redhat.com>
  1045. Cc: x86@kernel.org
  1046. Cc: linux-pm@vger.kernel.org
  1047. Cc: linux-kernel@vger.kernel.org
  1048. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1049. Patchset: intel-thread-director
  1050. ---
  1051. include/linux/sched.h | 2 +-
  1052. kernel/sched/core.c | 2 +-
  1053. kernel/time/timer.c | 2 +-
  1054. 3 files changed, 3 insertions(+), 3 deletions(-)
  1055. diff --git a/include/linux/sched.h b/include/linux/sched.h
  1056. index f292942178850..4f96c3dd59d0b 100644
  1057. --- a/include/linux/sched.h
  1058. +++ b/include/linux/sched.h
  1059. @@ -293,7 +293,7 @@ enum {
  1060. TASK_COMM_LEN = 16,
  1061. };
  1062. -extern void scheduler_tick(void);
  1063. +extern void scheduler_tick(bool user_tick);
  1064. #define MAX_SCHEDULE_TIMEOUT LONG_MAX
  1065. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  1066. index 325b1d3cf7a82..b438fc79f868f 100644
  1067. --- a/kernel/sched/core.c
  1068. +++ b/kernel/sched/core.c
  1069. @@ -5550,7 +5550,7 @@ static inline u64 cpu_resched_latency(struct rq *rq) { return 0; }
  1070. * This function gets called by the timer code, with HZ frequency.
  1071. * We call it with interrupts disabled.
  1072. */
  1073. -void scheduler_tick(void)
  1074. +void scheduler_tick(bool user_tick)
  1075. {
  1076. int cpu = smp_processor_id();
  1077. struct rq *rq = cpu_rq(cpu);
  1078. diff --git a/kernel/time/timer.c b/kernel/time/timer.c
  1079. index 63a8ce7177dd4..e15e24105891f 100644
  1080. --- a/kernel/time/timer.c
  1081. +++ b/kernel/time/timer.c
  1082. @@ -2073,7 +2073,7 @@ void update_process_times(int user_tick)
  1083. if (in_irq())
  1084. irq_work_tick();
  1085. #endif
  1086. - scheduler_tick();
  1087. + scheduler_tick(user_tick);
  1088. if (IS_ENABLED(CONFIG_POSIX_TIMERS))
  1089. run_posix_cpu_timers();
  1090. }
  1091. --
  1092. 2.39.2
  1093. From 7151037d127499dfdb328d84ffc2f435aa3471ce Mon Sep 17 00:00:00 2001
  1094. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1095. Date: Mon, 6 Feb 2023 21:10:46 -0800
  1096. Subject: [PATCH] sched/core: Update the IPC class of the current task
  1097. When supported, hardware monitors the instruction stream to classify the
  1098. current task. Hence, at userspace tick, we are ready to read the most
  1099. recent classification result for the current task.
  1100. Cc: Ben Segall <bsegall@google.com>
  1101. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1102. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1103. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1104. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1105. Cc: Len Brown <len.brown@intel.com>
  1106. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1107. Cc: Mel Gorman <mgorman@suse.de>
  1108. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1109. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1110. Cc: Steven Rostedt <rostedt@goodmis.org>
  1111. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1112. Cc: Valentin Schneider <vschneid@redhat.com>
  1113. Cc: x86@kernel.org
  1114. Cc: linux-pm@vger.kernel.org
  1115. Cc: linux-kernel@vger.kernel.org
  1116. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1117. Patchset: intel-thread-director
  1118. ---
  1119. kernel/sched/core.c | 3 +++
  1120. 1 file changed, 3 insertions(+)
  1121. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  1122. index b438fc79f868f..0ab39cc055c77 100644
  1123. --- a/kernel/sched/core.c
  1124. +++ b/kernel/sched/core.c
  1125. @@ -5562,6 +5562,9 @@ void scheduler_tick(bool user_tick)
  1126. if (housekeeping_cpu(cpu, HK_TYPE_TICK))
  1127. arch_scale_freq_tick();
  1128. + if (sched_ipcc_enabled() && user_tick)
  1129. + arch_update_ipcc(curr);
  1130. +
  1131. sched_clock_tick();
  1132. rq_lock(rq, &rf);
  1133. --
  1134. 2.39.2
  1135. From 7bd90996a0cfd74c641d808c8975ab8aa5796572 Mon Sep 17 00:00:00 2001
  1136. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1137. Date: Mon, 6 Feb 2023 21:10:47 -0800
  1138. Subject: [PATCH] sched/fair: Collect load-balancing stats for IPC classes
  1139. When selecting a busiest scheduling group, the IPC class of the current
  1140. task can be used to select between two scheduling groups of types asym_
  1141. packing or fully_busy that are otherwise identical.
  1142. Compute the IPC class performance score for a scheduling group. It
  1143. is the sum of the scores of the current tasks of all the runqueues.
  1144. Also, keep track of the class of the task with the lowest IPC class score
  1145. in the scheduling group.
  1146. These two metrics will be used during idle load balancing to compute the
  1147. current and the prospective IPC class score of a scheduling group.
  1148. Cc: Ben Segall <bsegall@google.com>
  1149. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1150. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1151. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1152. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1153. Cc: Len Brown <len.brown@intel.com>
  1154. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1155. Cc: Mel Gorman <mgorman@suse.de>
  1156. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1157. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1158. Cc: Steven Rostedt <rostedt@goodmis.org>
  1159. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1160. Cc: Valentin Schneider <vschneid@redhat.com>
  1161. Cc: x86@kernel.org
  1162. Cc: linux-pm@vger.kernel.org
  1163. Cc: linux-kernel@vger.kernel.org
  1164. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1165. Patchset: intel-thread-director
  1166. ---
  1167. kernel/sched/fair.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
  1168. 1 file changed, 61 insertions(+)
  1169. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1170. index e5079ee882ff8..a418164953c36 100644
  1171. --- a/kernel/sched/fair.c
  1172. +++ b/kernel/sched/fair.c
  1173. @@ -8767,6 +8767,11 @@ struct sg_lb_stats {
  1174. unsigned int nr_numa_running;
  1175. unsigned int nr_preferred_running;
  1176. #endif
  1177. +#ifdef CONFIG_IPC_CLASSES
  1178. + unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
  1179. + unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
  1180. + unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
  1181. +#endif
  1182. };
  1183. /*
  1184. @@ -9110,6 +9115,59 @@ group_type group_classify(unsigned int imbalance_pct,
  1185. return group_has_spare;
  1186. }
  1187. +#ifdef CONFIG_IPC_CLASSES
  1188. +static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
  1189. +{
  1190. + /* All IPCC stats have been set to zero in update_sg_lb_stats(). */
  1191. + sgs->min_score = ULONG_MAX;
  1192. +}
  1193. +
  1194. +/* Called only if cpu_of(@rq) is not idle and has tasks running. */
  1195. +static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1196. + struct rq *rq)
  1197. +{
  1198. + struct task_struct *curr;
  1199. + unsigned short ipcc;
  1200. + unsigned long score;
  1201. +
  1202. + if (!sched_ipcc_enabled())
  1203. + return;
  1204. +
  1205. + curr = rcu_dereference(rq->curr);
  1206. + if (!curr || (curr->flags & PF_EXITING) || is_idle_task(curr) ||
  1207. + task_is_realtime(curr) ||
  1208. + !cpumask_test_cpu(dst_cpu, curr->cpus_ptr))
  1209. + return;
  1210. +
  1211. + ipcc = curr->ipcc;
  1212. + score = arch_get_ipcc_score(ipcc, cpu_of(rq));
  1213. +
  1214. + /*
  1215. + * Ignore tasks with invalid scores. When finding the busiest group, we
  1216. + * prefer those with higher sum_score. This group will not be selected.
  1217. + */
  1218. + if (IS_ERR_VALUE(score))
  1219. + return;
  1220. +
  1221. + sgs->sum_score += score;
  1222. +
  1223. + if (score < sgs->min_score) {
  1224. + sgs->min_score = score;
  1225. + sgs->min_ipcc = ipcc;
  1226. + }
  1227. +}
  1228. +
  1229. +#else /* CONFIG_IPC_CLASSES */
  1230. +static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1231. + struct rq *rq)
  1232. +{
  1233. +}
  1234. +
  1235. +static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
  1236. +{
  1237. +}
  1238. +#endif /* CONFIG_IPC_CLASSES */
  1239. +
  1240. /**
  1241. * asym_smt_can_pull_tasks - Check whether the load balancing CPU can pull tasks
  1242. * @dst_cpu: Destination CPU of the load balancing
  1243. @@ -9202,6 +9260,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  1244. int i, nr_running, local_group;
  1245. memset(sgs, 0, sizeof(*sgs));
  1246. + init_rq_ipcc_stats(sgs);
  1247. local_group = group == sds->local;
  1248. @@ -9251,6 +9310,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  1249. if (sgs->group_misfit_task_load < load)
  1250. sgs->group_misfit_task_load = load;
  1251. }
  1252. +
  1253. + update_sg_lb_ipcc_stats(env->dst_cpu, sgs, rq);
  1254. }
  1255. sgs->group_capacity = group->sgc->capacity;
  1256. --
  1257. 2.39.2
  1258. From dcdc8c47500008e304dab90c7546127c8a056752 Mon Sep 17 00:00:00 2001
  1259. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1260. Date: Mon, 6 Feb 2023 21:10:48 -0800
  1261. Subject: [PATCH] sched/fair: Compute IPC class scores for load balancing
  1262. Compute the joint total (both current and prospective) IPC class score of
  1263. a scheduling group and the local scheduling group.
  1264. These IPCC statistics are used during idle load balancing. The candidate
  1265. scheduling group will have one fewer busy CPU after load balancing. This
  1266. observation is important for cores with SMT support.
  1267. The IPCC score of scheduling groups composed of SMT siblings needs to
  1268. consider that the siblings share CPU resources. When computing the total
  1269. IPCC score of the scheduling group, divide score of each sibling by the
  1270. number of busy siblings.
  1271. Collect IPCC statistics for asym_packing and fully_busy scheduling groups.
  1272. When picking a busiest group, they are used to break ties between otherwise
  1273. identical groups.
  1274. Cc: Ben Segall <bsegall@google.com>
  1275. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1276. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1277. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1278. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1279. Cc: Len Brown <len.brown@intel.com>
  1280. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1281. Cc: Mel Gorman <mgorman@suse.de>
  1282. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1283. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1284. Cc: Steven Rostedt <rostedt@goodmis.org>
  1285. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1286. Cc: Valentin Schneider <vschneid@redhat.com>
  1287. Cc: x86@kernel.org
  1288. Cc: linux-pm@vger.kernel.org
  1289. Cc: linux-kernel@vger.kernel.org
  1290. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1291. Patchset: intel-thread-director
  1292. ---
  1293. kernel/sched/fair.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
  1294. 1 file changed, 68 insertions(+)
  1295. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1296. index a418164953c36..ae0c908be707e 100644
  1297. --- a/kernel/sched/fair.c
  1298. +++ b/kernel/sched/fair.c
  1299. @@ -8771,6 +8771,8 @@ struct sg_lb_stats {
  1300. unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
  1301. unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
  1302. unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
  1303. + long ipcc_score_after; /* Prospective IPCC score after load balancing */
  1304. + unsigned long ipcc_score_before; /* IPCC score before load balancing */
  1305. #endif
  1306. };
  1307. @@ -9157,6 +9159,62 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1308. }
  1309. }
  1310. +static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1311. + struct sched_group *sg,
  1312. + struct lb_env *env)
  1313. +{
  1314. + unsigned long score_on_dst_cpu, before;
  1315. + int busy_cpus;
  1316. + long after;
  1317. +
  1318. + if (!sched_ipcc_enabled())
  1319. + return;
  1320. +
  1321. + /*
  1322. + * IPCC scores are only useful during idle load balancing. For now,
  1323. + * only asym_packing uses IPCC scores.
  1324. + */
  1325. + if (!(env->sd->flags & SD_ASYM_PACKING) ||
  1326. + env->idle == CPU_NOT_IDLE)
  1327. + return;
  1328. +
  1329. + /*
  1330. + * IPCC scores are used to break ties only between these types of
  1331. + * groups.
  1332. + */
  1333. + if (sgs->group_type != group_fully_busy &&
  1334. + sgs->group_type != group_asym_packing)
  1335. + return;
  1336. +
  1337. + busy_cpus = sgs->group_weight - sgs->idle_cpus;
  1338. +
  1339. + /* No busy CPUs in the group. No tasks to move. */
  1340. + if (!busy_cpus)
  1341. + return;
  1342. +
  1343. + score_on_dst_cpu = arch_get_ipcc_score(sgs->min_ipcc, env->dst_cpu);
  1344. +
  1345. + /*
  1346. + * Do not use IPC scores. sgs::ipcc_score_{after, before} will be zero
  1347. + * and not used.
  1348. + */
  1349. + if (IS_ERR_VALUE(score_on_dst_cpu))
  1350. + return;
  1351. +
  1352. + before = sgs->sum_score;
  1353. + after = before - sgs->min_score;
  1354. +
  1355. + /* SMT siblings share throughput. */
  1356. + if (busy_cpus > 1 && sg->flags & SD_SHARE_CPUCAPACITY) {
  1357. + before /= busy_cpus;
  1358. + /* One sibling will become idle after load balance. */
  1359. + after /= busy_cpus - 1;
  1360. + }
  1361. +
  1362. + sgs->ipcc_score_after = after + score_on_dst_cpu;
  1363. + sgs->ipcc_score_before = before;
  1364. +}
  1365. +
  1366. #else /* CONFIG_IPC_CLASSES */
  1367. static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1368. struct rq *rq)
  1369. @@ -9166,6 +9224,13 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1370. static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
  1371. {
  1372. }
  1373. +
  1374. +static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1375. + struct sched_group *sg,
  1376. + struct lb_env *env)
  1377. +{
  1378. +}
  1379. +
  1380. #endif /* CONFIG_IPC_CLASSES */
  1381. /**
  1382. @@ -9327,6 +9392,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  1383. sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
  1384. + if (!local_group)
  1385. + update_sg_lb_stats_scores(sgs, group, env);
  1386. +
  1387. /* Computing avg_load makes sense only when group is overloaded */
  1388. if (sgs->group_type == group_overloaded)
  1389. sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
  1390. --
  1391. 2.39.2
  1392. From f5899b589a3df28df698309c8529262012cbfcbc Mon Sep 17 00:00:00 2001
  1393. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1394. Date: Mon, 6 Feb 2023 21:10:49 -0800
  1395. Subject: [PATCH] sched/fair: Use IPCC stats to break ties between asym_packing
  1396. sched groups
  1397. As it iterates, update_sd_pick_busiest() keeps on selecting as busiest
  1398. sched groups of identical priority. Since both groups have the same
  1399. priority, either group is a good choice. The IPCC statistics provide a
  1400. measure of the throughput before and after load balance. Use them to
  1401. pick a busiest scheduling group from otherwise identical asym_packing
  1402. scheduling groups.
  1403. Pick as busiest the scheduling group that yields a higher IPCC score
  1404. after load balancing.
  1405. Cc: Ben Segall <bsegall@google.com>
  1406. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1407. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1408. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1409. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1410. Cc: Len Brown <len.brown@intel.com>
  1411. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1412. Cc: Mel Gorman <mgorman@suse.de>
  1413. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1414. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1415. Cc: Steven Rostedt <rostedt@goodmis.org>
  1416. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1417. Cc: Valentin Schneider <vschneid@redhat.com>
  1418. Cc: x86@kernel.org
  1419. Cc: linux-pm@vger.kernel.org
  1420. Cc: linux-kernel@vger.kernel.org
  1421. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1422. Patchset: intel-thread-director
  1423. ---
  1424. kernel/sched/fair.c | 72 +++++++++++++++++++++++++++++++++++++++++++++
  1425. 1 file changed, 72 insertions(+)
  1426. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1427. index ae0c908be707e..cffb435e2b1c4 100644
  1428. --- a/kernel/sched/fair.c
  1429. +++ b/kernel/sched/fair.c
  1430. @@ -9215,6 +9215,60 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1431. sgs->ipcc_score_before = before;
  1432. }
  1433. +/**
  1434. + * sched_asym_ipcc_prefer - Select a sched group based on its IPCC score
  1435. + * @a: Load balancing statistics of a sched group
  1436. + * @b: Load balancing statistics of a second sched group
  1437. + *
  1438. + * Returns: true if @a has a higher IPCC score than @b after load balance.
  1439. + * False otherwise.
  1440. + */
  1441. +static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
  1442. + struct sg_lb_stats *b)
  1443. +{
  1444. + if (!sched_ipcc_enabled())
  1445. + return false;
  1446. +
  1447. + /* @a increases overall throughput after load balance. */
  1448. + if (a->ipcc_score_after > b->ipcc_score_after)
  1449. + return true;
  1450. +
  1451. + /*
  1452. + * If @a and @b yield the same overall throughput, pick @a if
  1453. + * its current throughput is lower than that of @b.
  1454. + */
  1455. + if (a->ipcc_score_after == b->ipcc_score_after)
  1456. + return a->ipcc_score_before < b->ipcc_score_before;
  1457. +
  1458. + return false;
  1459. +}
  1460. +
  1461. +/**
  1462. + * sched_asym_ipcc_pick - Select a sched group based on its IPCC score
  1463. + * @a: A scheduling group
  1464. + * @b: A second scheduling group
  1465. + * @a_stats: Load balancing statistics of @a
  1466. + * @b_stats: Load balancing statistics of @b
  1467. + *
  1468. + * Returns: true if @a has the same priority and @a has tasks with IPC classes
  1469. + * that yield higher overall throughput after load balance. False otherwise.
  1470. + */
  1471. +static bool sched_asym_ipcc_pick(struct sched_group *a,
  1472. + struct sched_group *b,
  1473. + struct sg_lb_stats *a_stats,
  1474. + struct sg_lb_stats *b_stats)
  1475. +{
  1476. + /*
  1477. + * Only use the class-specific preference selection if both sched
  1478. + * groups have the same priority.
  1479. + */
  1480. + if (arch_asym_cpu_priority(a->asym_prefer_cpu) !=
  1481. + arch_asym_cpu_priority(b->asym_prefer_cpu))
  1482. + return false;
  1483. +
  1484. + return sched_asym_ipcc_prefer(a_stats, b_stats);
  1485. +}
  1486. +
  1487. #else /* CONFIG_IPC_CLASSES */
  1488. static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1489. struct rq *rq)
  1490. @@ -9231,6 +9285,14 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1491. {
  1492. }
  1493. +static bool sched_asym_ipcc_pick(struct sched_group *a,
  1494. + struct sched_group *b,
  1495. + struct sg_lb_stats *a_stats,
  1496. + struct sg_lb_stats *b_stats)
  1497. +{
  1498. + return false;
  1499. +}
  1500. +
  1501. #endif /* CONFIG_IPC_CLASSES */
  1502. /**
  1503. @@ -9466,6 +9528,16 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  1504. /* Prefer to move from lowest priority CPU's work */
  1505. if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
  1506. return false;
  1507. +
  1508. + /*
  1509. + * Unlike other callers of sched_asym_prefer(), here both @sg
  1510. + * and @sds::busiest have tasks running. When they have equal
  1511. + * priority, their IPC class scores can be used to select a
  1512. + * better busiest.
  1513. + */
  1514. + if (sched_asym_ipcc_pick(sds->busiest, sg, &sds->busiest_stat, sgs))
  1515. + return false;
  1516. +
  1517. break;
  1518. case group_misfit_task:
  1519. --
  1520. 2.39.2
  1521. From 516bec260bf73b1f5c078755b96593849fd166d3 Mon Sep 17 00:00:00 2001
  1522. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1523. Date: Mon, 6 Feb 2023 21:10:50 -0800
  1524. Subject: [PATCH] sched/fair: Use IPCC stats to break ties between fully_busy
  1525. SMT groups
  1526. IPCC statistics are used during idle load balancing. After balancing one
  1527. of the siblings of an SMT core will become idle. The rest of the busy
  1528. siblings will enjoy increased throughput. The IPCC statistics provide
  1529. a measure of the increased throughput. Use them to pick a busiest group
  1530. from otherwise identical fully_busy scheduling groups (of which the
  1531. avg_load is equal - and zero).
  1532. Using IPCC scores to break ties with non-SMT fully_busy sched groups
  1533. is not necessary. SMT sched groups always need more help.
  1534. Add a stub sched_asym_ipcc_prefer() for !CONFIG_IPC_CLASSES.
  1535. Cc: Ben Segall <bsegall@google.com>
  1536. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1537. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1538. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1539. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1540. Cc: Len Brown <len.brown@intel.com>
  1541. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1542. Cc: Mel Gorman <mgorman@suse.de>
  1543. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1544. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1545. Cc: Steven Rostedt <rostedt@goodmis.org>
  1546. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1547. Cc: Valentin Schneider <vschneid@redhat.com>
  1548. Cc: x86@kernel.org
  1549. Cc: linux-pm@vger.kernel.org
  1550. Cc: linux-kernel@vger.kernel.org
  1551. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1552. Patchset: intel-thread-director
  1553. ---
  1554. kernel/sched/fair.c | 23 ++++++++++++++++++++---
  1555. 1 file changed, 20 insertions(+), 3 deletions(-)
  1556. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1557. index cffb435e2b1c4..0996339df429b 100644
  1558. --- a/kernel/sched/fair.c
  1559. +++ b/kernel/sched/fair.c
  1560. @@ -9285,6 +9285,12 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1561. {
  1562. }
  1563. +static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
  1564. + struct sg_lb_stats *b)
  1565. +{
  1566. + return false;
  1567. +}
  1568. +
  1569. static bool sched_asym_ipcc_pick(struct sched_group *a,
  1570. struct sched_group *b,
  1571. struct sg_lb_stats *a_stats,
  1572. @@ -9568,10 +9574,21 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  1573. if (sgs->avg_load == busiest->avg_load) {
  1574. /*
  1575. * SMT sched groups need more help than non-SMT groups.
  1576. - * If @sg happens to also be SMT, either choice is good.
  1577. */
  1578. - if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
  1579. - return false;
  1580. + if (sds->busiest->flags & SD_SHARE_CPUCAPACITY) {
  1581. + if (!(sg->flags & SD_SHARE_CPUCAPACITY))
  1582. + return false;
  1583. +
  1584. + /*
  1585. + * Between two SMT groups, use IPCC scores to pick the
  1586. + * one that would improve throughput the most (only
  1587. + * asym_packing uses IPCC scores for now).
  1588. + */
  1589. + if (sched_ipcc_enabled() &&
  1590. + env->sd->flags & SD_ASYM_PACKING &&
  1591. + sched_asym_ipcc_prefer(busiest, sgs))
  1592. + return false;
  1593. + }
  1594. }
  1595. break;
  1596. --
  1597. 2.39.2
  1598. From 442df79e3613c6db2f01a8489177d0edd366309d Mon Sep 17 00:00:00 2001
  1599. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1600. Date: Mon, 6 Feb 2023 21:10:51 -0800
  1601. Subject: [PATCH] sched/fair: Use IPCC scores to select a busiest runqueue
  1602. For two runqueues of equal priority and equal number of running of tasks,
  1603. select the one whose current task would have the highest IPC class score
  1604. if placed on the destination CPU.
  1605. For now, use IPCC scores only for scheduling domains with the
  1606. SD_ASYM_PACKING flag.
  1607. Cc: Ben Segall <bsegall@google.com>
  1608. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1609. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1610. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1611. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1612. Cc: Len Brown <len.brown@intel.com>
  1613. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1614. Cc: Mel Gorman <mgorman@suse.de>
  1615. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1616. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1617. Cc: Steven Rostedt <rostedt@goodmis.org>
  1618. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1619. Cc: Valentin Schneider <vschneid@redhat.com>
  1620. Cc: x86@kernel.org
  1621. Cc: linux-pm@vger.kernel.org
  1622. Cc: linux-kernel@vger.kernel.org
  1623. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1624. Patchset: intel-thread-director
  1625. ---
  1626. kernel/sched/fair.c | 64 +++++++++++++++++++++++++++++++++++++++++++++
  1627. 1 file changed, 64 insertions(+)
  1628. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1629. index 0996339df429b..a9a105092e7c3 100644
  1630. --- a/kernel/sched/fair.c
  1631. +++ b/kernel/sched/fair.c
  1632. @@ -9269,6 +9269,37 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
  1633. return sched_asym_ipcc_prefer(a_stats, b_stats);
  1634. }
  1635. +/**
  1636. + * ipcc_score_delta - Get the IPCC score delta wrt the load balance's dst_cpu
  1637. + * @p: A task
  1638. + * @env: Load balancing environment
  1639. + *
  1640. + * Returns: The IPCC score delta that @p would get if placed in the destination
  1641. + * CPU of @env. LONG_MIN to indicate that the delta should not be used.
  1642. + */
  1643. +static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
  1644. +{
  1645. + unsigned long score_src, score_dst;
  1646. + unsigned short ipcc = p->ipcc;
  1647. +
  1648. + if (!sched_ipcc_enabled())
  1649. + return LONG_MIN;
  1650. +
  1651. + /* Only asym_packing uses IPCC scores at the moment. */
  1652. + if (!(env->sd->flags & SD_ASYM_PACKING))
  1653. + return LONG_MIN;
  1654. +
  1655. + score_dst = arch_get_ipcc_score(ipcc, env->dst_cpu);
  1656. + if (IS_ERR_VALUE(score_dst))
  1657. + return LONG_MIN;
  1658. +
  1659. + score_src = arch_get_ipcc_score(ipcc, task_cpu(p));
  1660. + if (IS_ERR_VALUE(score_src))
  1661. + return LONG_MIN;
  1662. +
  1663. + return score_dst - score_src;
  1664. +}
  1665. +
  1666. #else /* CONFIG_IPC_CLASSES */
  1667. static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1668. struct rq *rq)
  1669. @@ -9299,6 +9330,11 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
  1670. return false;
  1671. }
  1672. +static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
  1673. +{
  1674. + return LONG_MIN;
  1675. +}
  1676. +
  1677. #endif /* CONFIG_IPC_CLASSES */
  1678. /**
  1679. @@ -10459,6 +10495,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
  1680. {
  1681. struct rq *busiest = NULL, *rq;
  1682. unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
  1683. + long busiest_ipcc_delta = LONG_MIN;
  1684. unsigned int busiest_nr = 0;
  1685. int i;
  1686. @@ -10575,8 +10612,35 @@ static struct rq *find_busiest_queue(struct lb_env *env,
  1687. case migrate_task:
  1688. if (busiest_nr < nr_running) {
  1689. + struct task_struct *curr;
  1690. +
  1691. busiest_nr = nr_running;
  1692. busiest = rq;
  1693. +
  1694. + /*
  1695. + * Remember the IPCC score delta of busiest::curr.
  1696. + * We may need it to break a tie with other queues
  1697. + * with equal nr_running.
  1698. + */
  1699. + curr = rcu_dereference(busiest->curr);
  1700. + busiest_ipcc_delta = ipcc_score_delta(curr, env);
  1701. + /*
  1702. + * If rq and busiest have the same number of running
  1703. + * tasks and IPC classes are supported, pick rq if doing
  1704. + * so would give rq::curr a bigger IPC boost on dst_cpu.
  1705. + */
  1706. + } else if (busiest_nr == nr_running) {
  1707. + struct task_struct *curr;
  1708. + long delta;
  1709. +
  1710. + curr = rcu_dereference(rq->curr);
  1711. + delta = ipcc_score_delta(curr, env);
  1712. +
  1713. + if (busiest_ipcc_delta < delta) {
  1714. + busiest_ipcc_delta = delta;
  1715. + busiest_nr = nr_running;
  1716. + busiest = rq;
  1717. + }
  1718. }
  1719. break;
  1720. --
  1721. 2.39.2
  1722. From fa944aa2c7b296272c55a201a3aa40a84f9737a5 Mon Sep 17 00:00:00 2001
  1723. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1724. Date: Mon, 6 Feb 2023 21:10:52 -0800
  1725. Subject: [PATCH] thermal: intel: hfi: Introduce Intel Thread Director classes
  1726. On Intel hybrid parts, each type of CPU has specific performance and
  1727. energy efficiency capabilities. The Intel Thread Director technology
  1728. extends the Hardware Feedback Interface (HFI) to provide performance and
  1729. energy efficiency data for advanced classes of instructions.
  1730. Add support to parse per-class capabilities.
  1731. Cc: Ben Segall <bsegall@google.com>
  1732. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1733. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1734. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1735. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1736. Cc: Len Brown <len.brown@intel.com>
  1737. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1738. Cc: Mel Gorman <mgorman@suse.de>
  1739. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1740. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1741. Cc: Steven Rostedt <rostedt@goodmis.org>
  1742. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1743. Cc: Valentin Schneider <vschneid@redhat.com>
  1744. Cc: x86@kernel.org
  1745. Cc: linux-pm@vger.kernel.org
  1746. Cc: linux-kernel@vger.kernel.org
  1747. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1748. Patchset: intel-thread-director
  1749. ---
  1750. drivers/thermal/intel/intel_hfi.c | 30 ++++++++++++++++++++++++------
  1751. 1 file changed, 24 insertions(+), 6 deletions(-)
  1752. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  1753. index 6e604bda2b939..2527ae3836c74 100644
  1754. --- a/drivers/thermal/intel/intel_hfi.c
  1755. +++ b/drivers/thermal/intel/intel_hfi.c
  1756. @@ -77,7 +77,7 @@ union cpuid6_edx {
  1757. * @ee_cap: Energy efficiency capability
  1758. *
  1759. * Capabilities of a logical processor in the HFI table. These capabilities are
  1760. - * unitless.
  1761. + * unitless and specific to each HFI class.
  1762. */
  1763. struct hfi_cpu_data {
  1764. u8 perf_cap;
  1765. @@ -89,7 +89,8 @@ struct hfi_cpu_data {
  1766. * @perf_updated: Hardware updated performance capabilities
  1767. * @ee_updated: Hardware updated energy efficiency capabilities
  1768. *
  1769. - * Properties of the data in an HFI table.
  1770. + * Properties of the data in an HFI table. There exists one header per each
  1771. + * HFI class.
  1772. */
  1773. struct hfi_hdr {
  1774. u8 perf_updated;
  1775. @@ -127,16 +128,21 @@ struct hfi_instance {
  1776. /**
  1777. * struct hfi_features - Supported HFI features
  1778. + * @nr_classes: Number of classes supported
  1779. * @nr_table_pages: Size of the HFI table in 4KB pages
  1780. * @cpu_stride: Stride size to locate the capability data of a logical
  1781. * processor within the table (i.e., row stride)
  1782. + * @class_stride: Stride size to locate a class within the capability
  1783. + * data of a logical processor or the HFI table header
  1784. * @hdr_size: Size of the table header
  1785. *
  1786. * Parameters and supported features that are common to all HFI instances
  1787. */
  1788. struct hfi_features {
  1789. + unsigned int nr_classes;
  1790. size_t nr_table_pages;
  1791. unsigned int cpu_stride;
  1792. + unsigned int class_stride;
  1793. unsigned int hdr_size;
  1794. };
  1795. @@ -333,8 +339,8 @@ static void init_hfi_cpu_index(struct hfi_cpu_info *info)
  1796. }
  1797. /*
  1798. - * The format of the HFI table depends on the number of capabilities that the
  1799. - * hardware supports. Keep a data structure to navigate the table.
  1800. + * The format of the HFI table depends on the number of capabilities and classes
  1801. + * that the hardware supports. Keep a data structure to navigate the table.
  1802. */
  1803. static void init_hfi_instance(struct hfi_instance *hfi_instance)
  1804. {
  1805. @@ -515,18 +521,30 @@ static __init int hfi_parse_features(void)
  1806. /* The number of 4KB pages required by the table */
  1807. hfi_features.nr_table_pages = edx.split.table_pages + 1;
  1808. + /*
  1809. + * Capability fields of an HFI class are grouped together. Classes are
  1810. + * contiguous in memory. Hence, use the number of supported features to
  1811. + * locate a specific class.
  1812. + */
  1813. + hfi_features.class_stride = nr_capabilities;
  1814. +
  1815. + /* For now, use only one class of the HFI table */
  1816. + hfi_features.nr_classes = 1;
  1817. +
  1818. /*
  1819. * The header contains change indications for each supported feature.
  1820. * The size of the table header is rounded up to be a multiple of 8
  1821. * bytes.
  1822. */
  1823. - hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
  1824. + hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities *
  1825. + hfi_features.nr_classes, 8) * 8;
  1826. /*
  1827. * Data of each logical processor is also rounded up to be a multiple
  1828. * of 8 bytes.
  1829. */
  1830. - hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
  1831. + hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities *
  1832. + hfi_features.nr_classes, 8) * 8;
  1833. return 0;
  1834. }
  1835. --
  1836. 2.39.2
  1837. From 61b13cb56dcd43bfa7ef1a94ae93fb4f9d45b7dc Mon Sep 17 00:00:00 2001
  1838. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1839. Date: Mon, 6 Feb 2023 21:10:53 -0800
  1840. Subject: [PATCH] x86/cpufeatures: Add the Intel Thread Director feature
  1841. definitions
  1842. Intel Thread Director (ITD) provides hardware resources to classify
  1843. the current task. The classification reflects the type of instructions that
  1844. a task currently executes.
  1845. ITD extends the Hardware Feedback Interface table to provide performance
  1846. and energy efficiency capabilities for each of the supported classes of
  1847. tasks.
  1848. Cc: Ben Segall <bsegall@google.com>
  1849. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1850. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1851. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1852. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1853. Cc: Len Brown <len.brown@intel.com>
  1854. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1855. Cc: Mel Gorman <mgorman@suse.de>
  1856. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1857. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1858. Cc: Steven Rostedt <rostedt@goodmis.org>
  1859. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1860. Cc: Valentin Schneider <vschneid@redhat.com>
  1861. Cc: x86@kernel.org
  1862. Cc: linux-pm@vger.kernel.org
  1863. Cc: linux-kernel@vger.kernel.org
  1864. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1865. Patchset: intel-thread-director
  1866. ---
  1867. arch/x86/include/asm/cpufeatures.h | 1 +
  1868. arch/x86/include/asm/disabled-features.h | 8 +++++++-
  1869. arch/x86/kernel/cpu/cpuid-deps.c | 1 +
  1870. 3 files changed, 9 insertions(+), 1 deletion(-)
  1871. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
  1872. index 8f39c46197b82..a2f2730737aeb 100644
  1873. --- a/arch/x86/include/asm/cpufeatures.h
  1874. +++ b/arch/x86/include/asm/cpufeatures.h
  1875. @@ -345,6 +345,7 @@
  1876. #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
  1877. #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
  1878. #define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
  1879. +#define X86_FEATURE_ITD (14*32+23) /* Intel Thread Director */
  1880. /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
  1881. #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
  1882. diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
  1883. index c44b56f7ffba0..0edd9bef7f2ed 100644
  1884. --- a/arch/x86/include/asm/disabled-features.h
  1885. +++ b/arch/x86/include/asm/disabled-features.h
  1886. @@ -99,6 +99,12 @@
  1887. # define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
  1888. #endif
  1889. +#ifdef CONFIG_IPC_CLASSES
  1890. +# define DISABLE_ITD 0
  1891. +#else
  1892. +# define DISABLE_ITD (1 << (X86_FEATURE_ITD & 31))
  1893. +#endif
  1894. +
  1895. /*
  1896. * Make sure to add features to the correct mask
  1897. */
  1898. @@ -117,7 +123,7 @@
  1899. DISABLE_CALL_DEPTH_TRACKING)
  1900. #define DISABLED_MASK12 0
  1901. #define DISABLED_MASK13 0
  1902. -#define DISABLED_MASK14 0
  1903. +#define DISABLED_MASK14 (DISABLE_ITD)
  1904. #define DISABLED_MASK15 0
  1905. #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
  1906. DISABLE_ENQCMD)
  1907. diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
  1908. index d952211171292..277f157e067e5 100644
  1909. --- a/arch/x86/kernel/cpu/cpuid-deps.c
  1910. +++ b/arch/x86/kernel/cpu/cpuid-deps.c
  1911. @@ -79,6 +79,7 @@ static const struct cpuid_dep cpuid_deps[] = {
  1912. { X86_FEATURE_XFD, X86_FEATURE_XSAVES },
  1913. { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
  1914. { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
  1915. + { X86_FEATURE_ITD, X86_FEATURE_HFI },
  1916. {}
  1917. };
  1918. --
  1919. 2.39.2
  1920. From b32f2ed414ebd4bef042aa2529acdefbad0352a2 Mon Sep 17 00:00:00 2001
  1921. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1922. Date: Mon, 6 Feb 2023 21:10:54 -0800
  1923. Subject: [PATCH] thermal: intel: hfi: Store per-CPU IPCC scores
  1924. The scheduler reads the IPCC scores when balancing load. These reads can
  1925. be quite frequent. Hardware can also update the HFI table frequently.
  1926. Concurrent access may cause a lot of lock contention. It gets worse as the
  1927. number of CPUs increases.
  1928. Instead, create separate per-CPU IPCC scores that the scheduler can read
  1929. without the HFI table lock.
  1930. Cc: Ben Segall <bsegall@google.com>
  1931. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1932. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1933. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1934. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1935. Cc: Len Brown <len.brown@intel.com>
  1936. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1937. Cc: Mel Gorman <mgorman@suse.de>
  1938. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1939. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1940. Cc: Steven Rostedt <rostedt@goodmis.org>
  1941. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1942. Cc: Valentin Schneider <vschneid@redhat.com>
  1943. Cc: x86@kernel.org
  1944. Cc: linux-pm@vger.kernel.org
  1945. Cc: linux-kernel@vger.kernel.org
  1946. Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
  1947. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1948. Patchset: intel-thread-director
  1949. ---
  1950. drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
  1951. 1 file changed, 46 insertions(+)
  1952. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  1953. index 2527ae3836c74..b06021828892c 100644
  1954. --- a/drivers/thermal/intel/intel_hfi.c
  1955. +++ b/drivers/thermal/intel/intel_hfi.c
  1956. @@ -29,6 +29,7 @@
  1957. #include <linux/kernel.h>
  1958. #include <linux/math.h>
  1959. #include <linux/mutex.h>
  1960. +#include <linux/percpu.h>
  1961. #include <linux/percpu-defs.h>
  1962. #include <linux/printk.h>
  1963. #include <linux/processor.h>
  1964. @@ -170,6 +171,43 @@ static struct workqueue_struct *hfi_updates_wq;
  1965. #define HFI_UPDATE_INTERVAL HZ
  1966. #define HFI_MAX_THERM_NOTIFY_COUNT 16
  1967. +#ifdef CONFIG_IPC_CLASSES
  1968. +static int __percpu *hfi_ipcc_scores;
  1969. +
  1970. +static int alloc_hfi_ipcc_scores(void)
  1971. +{
  1972. + if (!cpu_feature_enabled(X86_FEATURE_ITD))
  1973. + return 0;
  1974. +
  1975. + hfi_ipcc_scores = __alloc_percpu(sizeof(*hfi_ipcc_scores) *
  1976. + hfi_features.nr_classes,
  1977. + sizeof(*hfi_ipcc_scores));
  1978. +
  1979. + return !hfi_ipcc_scores;
  1980. +}
  1981. +
  1982. +static void set_hfi_ipcc_score(void *caps, int cpu)
  1983. +{
  1984. + int i, *hfi_class;
  1985. +
  1986. + if (!cpu_feature_enabled(X86_FEATURE_ITD))
  1987. + return;
  1988. +
  1989. + hfi_class = per_cpu_ptr(hfi_ipcc_scores, cpu);
  1990. +
  1991. + for (i = 0; i < hfi_features.nr_classes; i++) {
  1992. + struct hfi_cpu_data *class_caps;
  1993. +
  1994. + class_caps = caps + i * hfi_features.class_stride;
  1995. + WRITE_ONCE(hfi_class[i], class_caps->perf_cap);
  1996. + }
  1997. +}
  1998. +
  1999. +#else
  2000. +static int alloc_hfi_ipcc_scores(void) { return 0; }
  2001. +static void set_hfi_ipcc_score(void *caps, int cpu) { }
  2002. +#endif /* CONFIG_IPC_CLASSES */
  2003. +
  2004. static void get_hfi_caps(struct hfi_instance *hfi_instance,
  2005. struct thermal_genl_cpu_caps *cpu_caps)
  2006. {
  2007. @@ -192,6 +230,8 @@ static void get_hfi_caps(struct hfi_instance *hfi_instance,
  2008. cpu_caps[i].efficiency = caps->ee_cap << 2;
  2009. ++i;
  2010. +
  2011. + set_hfi_ipcc_score(caps, cpu);
  2012. }
  2013. raw_spin_unlock_irq(&hfi_instance->table_lock);
  2014. }
  2015. @@ -580,8 +620,14 @@ void __init intel_hfi_init(void)
  2016. if (!hfi_updates_wq)
  2017. goto err_nomem;
  2018. + if (alloc_hfi_ipcc_scores())
  2019. + goto err_ipcc;
  2020. +
  2021. return;
  2022. +err_ipcc:
  2023. + destroy_workqueue(hfi_updates_wq);
  2024. +
  2025. err_nomem:
  2026. for (j = 0; j < i; ++j) {
  2027. hfi_instance = &hfi_instances[j];
  2028. --
  2029. 2.39.2
  2030. From 9b519ff89b08af84eb947598643a71fddcc6a263 Mon Sep 17 00:00:00 2001
  2031. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2032. Date: Mon, 6 Feb 2023 21:10:55 -0800
  2033. Subject: [PATCH] thermal: intel: hfi: Update the IPC class of the current task
  2034. Use Intel Thread Director classification to update the IPC class of a
  2035. task. Implement the arch_update_ipcc() interface of the scheduler.
  2036. Cc: Ben Segall <bsegall@google.com>
  2037. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2038. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2039. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2040. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2041. Cc: Len Brown <len.brown@intel.com>
  2042. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2043. Cc: Mel Gorman <mgorman@suse.de>
  2044. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2045. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2046. Cc: Steven Rostedt <rostedt@goodmis.org>
  2047. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2048. Cc: Valentin Schneider <vschneid@redhat.com>
  2049. Cc: x86@kernel.org
  2050. Cc: linux-pm@vger.kernel.org
  2051. Cc: linux-kernel@vger.kernel.org
  2052. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2053. Patchset: intel-thread-director
  2054. ---
  2055. arch/x86/include/asm/topology.h | 6 ++++++
  2056. drivers/thermal/intel/intel_hfi.c | 32 +++++++++++++++++++++++++++++++
  2057. 2 files changed, 38 insertions(+)
  2058. diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
  2059. index 458c891a82736..ffcdac3f398f0 100644
  2060. --- a/arch/x86/include/asm/topology.h
  2061. +++ b/arch/x86/include/asm/topology.h
  2062. @@ -227,4 +227,10 @@ void init_freq_invariance_cppc(void);
  2063. #define arch_init_invariance_cppc init_freq_invariance_cppc
  2064. #endif
  2065. +#if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
  2066. +void intel_hfi_update_ipcc(struct task_struct *curr);
  2067. +
  2068. +#define arch_update_ipcc intel_hfi_update_ipcc
  2069. +#endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
  2070. +
  2071. #endif /* _ASM_X86_TOPOLOGY_H */
  2072. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2073. index b06021828892c..530dcf57e06e2 100644
  2074. --- a/drivers/thermal/intel/intel_hfi.c
  2075. +++ b/drivers/thermal/intel/intel_hfi.c
  2076. @@ -72,6 +72,17 @@ union cpuid6_edx {
  2077. u32 full;
  2078. };
  2079. +#ifdef CONFIG_IPC_CLASSES
  2080. +union hfi_thread_feedback_char_msr {
  2081. + struct {
  2082. + u64 classid : 8;
  2083. + u64 __reserved : 55;
  2084. + u64 valid : 1;
  2085. + } split;
  2086. + u64 full;
  2087. +};
  2088. +#endif
  2089. +
  2090. /**
  2091. * struct hfi_cpu_data - HFI capabilities per CPU
  2092. * @perf_cap: Performance capability
  2093. @@ -174,6 +185,27 @@ static struct workqueue_struct *hfi_updates_wq;
  2094. #ifdef CONFIG_IPC_CLASSES
  2095. static int __percpu *hfi_ipcc_scores;
  2096. +void intel_hfi_update_ipcc(struct task_struct *curr)
  2097. +{
  2098. + union hfi_thread_feedback_char_msr msr;
  2099. +
  2100. + /* We should not be here if ITD is not supported. */
  2101. + if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
  2102. + pr_warn_once("task classification requested but not supported!");
  2103. + return;
  2104. + }
  2105. +
  2106. + rdmsrl(MSR_IA32_HW_FEEDBACK_CHAR, msr.full);
  2107. + if (!msr.split.valid)
  2108. + return;
  2109. +
  2110. + /*
  2111. + * 0 is a valid classification for Intel Thread Director. A scheduler
  2112. + * IPCC class of 0 means that the task is unclassified. Adjust.
  2113. + */
  2114. + curr->ipcc = msr.split.classid + 1;
  2115. +}
  2116. +
  2117. static int alloc_hfi_ipcc_scores(void)
  2118. {
  2119. if (!cpu_feature_enabled(X86_FEATURE_ITD))
  2120. --
  2121. 2.39.2
  2122. From 4cd93c9b598e57aa752639a4d93240d54ca89f23 Mon Sep 17 00:00:00 2001
  2123. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2124. Date: Mon, 6 Feb 2023 21:10:56 -0800
  2125. Subject: [PATCH] thermal: intel: hfi: Report the IPC class score of a CPU
  2126. Implement the arch_get_ipcc_score() interface of the scheduler. Use the
  2127. performance capabilities of the extended Hardware Feedback Interface table
  2128. as the IPC score.
  2129. Cc: Ben Segall <bsegall@google.com>
  2130. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2131. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2132. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2133. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2134. Cc: Len Brown <len.brown@intel.com>
  2135. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2136. Cc: Mel Gorman <mgorman@suse.de>
  2137. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2138. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2139. Cc: Steven Rostedt <rostedt@goodmis.org>
  2140. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2141. Cc: Valentin Schneider <vschneid@redhat.com>
  2142. Cc: x86@kernel.org
  2143. Cc: linux-pm@vger.kernel.org
  2144. Cc: linux-kernel@vger.kernel.org
  2145. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2146. Patchset: intel-thread-director
  2147. ---
  2148. arch/x86/include/asm/topology.h | 2 ++
  2149. drivers/thermal/intel/intel_hfi.c | 27 +++++++++++++++++++++++++++
  2150. 2 files changed, 29 insertions(+)
  2151. diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
  2152. index ffcdac3f398f0..c4fcd9c3c634f 100644
  2153. --- a/arch/x86/include/asm/topology.h
  2154. +++ b/arch/x86/include/asm/topology.h
  2155. @@ -229,8 +229,10 @@ void init_freq_invariance_cppc(void);
  2156. #if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
  2157. void intel_hfi_update_ipcc(struct task_struct *curr);
  2158. +unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu);
  2159. #define arch_update_ipcc intel_hfi_update_ipcc
  2160. +#define arch_get_ipcc_score intel_hfi_get_ipcc_score
  2161. #endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
  2162. #endif /* _ASM_X86_TOPOLOGY_H */
  2163. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2164. index 530dcf57e06e2..fa9b4a678d926 100644
  2165. --- a/drivers/thermal/intel/intel_hfi.c
  2166. +++ b/drivers/thermal/intel/intel_hfi.c
  2167. @@ -206,6 +206,33 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
  2168. curr->ipcc = msr.split.classid + 1;
  2169. }
  2170. +unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
  2171. +{
  2172. + unsigned short hfi_class;
  2173. + int *scores;
  2174. +
  2175. + if (cpu < 0 || cpu >= nr_cpu_ids)
  2176. + return -EINVAL;
  2177. +
  2178. + if (ipcc == IPC_CLASS_UNCLASSIFIED)
  2179. + return -EINVAL;
  2180. +
  2181. + /*
  2182. + * Scheduler IPC classes start at 1. HFI classes start at 0.
  2183. + * See note intel_hfi_update_ipcc().
  2184. + */
  2185. + hfi_class = ipcc - 1;
  2186. +
  2187. + if (hfi_class >= hfi_features.nr_classes)
  2188. + return -EINVAL;
  2189. +
  2190. + scores = per_cpu_ptr(hfi_ipcc_scores, cpu);
  2191. + if (!scores)
  2192. + return -ENODEV;
  2193. +
  2194. + return READ_ONCE(scores[hfi_class]);
  2195. +}
  2196. +
  2197. static int alloc_hfi_ipcc_scores(void)
  2198. {
  2199. if (!cpu_feature_enabled(X86_FEATURE_ITD))
  2200. --
  2201. 2.39.2
  2202. From 6452cc53bb25d5f4716f2e59ae3900452315b9be Mon Sep 17 00:00:00 2001
  2203. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2204. Date: Mon, 6 Feb 2023 21:10:57 -0800
  2205. Subject: [PATCH] thermal: intel: hfi: Define a default class for unclassified
  2206. tasks
  2207. A task may be unclassified if it has been recently created, spend most of
  2208. its lifetime sleeping, or hardware has not provided a classification.
  2209. Most tasks will be eventually classified as scheduler's IPC class 1
  2210. (HFI class 0). This class corresponds to the capabilities in the legacy,
  2211. classless, HFI table.
  2212. IPC class 1 is a reasonable choice until hardware provides an actual
  2213. classification. Meanwhile, the scheduler will place classes of tasks with
  2214. higher IPC scores on higher-performance CPUs.
  2215. Cc: Ben Segall <bsegall@google.com>
  2216. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2217. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2218. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2219. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2220. Cc: Len Brown <len.brown@intel.com>
  2221. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2222. Cc: Mel Gorman <mgorman@suse.de>
  2223. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2224. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2225. Cc: Steven Rostedt <rostedt@goodmis.org>
  2226. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2227. Cc: Valentin Schneider <vschneid@redhat.com>
  2228. Cc: x86@kernel.org
  2229. Cc: linux-pm@vger.kernel.org
  2230. Cc: linux-kernel@vger.kernel.org
  2231. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2232. Patchset: intel-thread-director
  2233. ---
  2234. drivers/thermal/intel/intel_hfi.c | 15 ++++++++++++++-
  2235. 1 file changed, 14 insertions(+), 1 deletion(-)
  2236. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2237. index fa9b4a678d926..7ea6acce7107e 100644
  2238. --- a/drivers/thermal/intel/intel_hfi.c
  2239. +++ b/drivers/thermal/intel/intel_hfi.c
  2240. @@ -185,6 +185,19 @@ static struct workqueue_struct *hfi_updates_wq;
  2241. #ifdef CONFIG_IPC_CLASSES
  2242. static int __percpu *hfi_ipcc_scores;
  2243. +/*
  2244. + * A task may be unclassified if it has been recently created, spend most of
  2245. + * its lifetime sleeping, or hardware has not provided a classification.
  2246. + *
  2247. + * Most tasks will be classified as scheduler's IPC class 1 (HFI class 0)
  2248. + * eventually. Meanwhile, the scheduler will place classes of tasks with higher
  2249. + * IPC scores on higher-performance CPUs.
  2250. + *
  2251. + * IPC class 1 is a reasonable choice. It matches the performance capability
  2252. + * of the legacy, classless, HFI table.
  2253. + */
  2254. +#define HFI_UNCLASSIFIED_DEFAULT 1
  2255. +
  2256. void intel_hfi_update_ipcc(struct task_struct *curr)
  2257. {
  2258. union hfi_thread_feedback_char_msr msr;
  2259. @@ -215,7 +228,7 @@ unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
  2260. return -EINVAL;
  2261. if (ipcc == IPC_CLASS_UNCLASSIFIED)
  2262. - return -EINVAL;
  2263. + ipcc = HFI_UNCLASSIFIED_DEFAULT;
  2264. /*
  2265. * Scheduler IPC classes start at 1. HFI classes start at 0.
  2266. --
  2267. 2.39.2
  2268. From 44126224fe2556862b2324fbff03fd627e195080 Mon Sep 17 00:00:00 2001
  2269. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2270. Date: Mon, 6 Feb 2023 21:10:58 -0800
  2271. Subject: [PATCH] thermal: intel: hfi: Enable the Intel Thread Director
  2272. Enable Intel Thread Director from the CPU hotplug callback: globally from
  2273. CPU0 and then enable the thread-classification hardware in each logical
  2274. processor individually.
  2275. Also, initialize the number of classes supported.
  2276. Let the scheduler know that it can start using IPC classes.
  2277. Cc: Ben Segall <bsegall@google.com>
  2278. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2279. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2280. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2281. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2282. Cc: Len Brown <len.brown@intel.com>
  2283. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2284. Cc: Mel Gorman <mgorman@suse.de>
  2285. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2286. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2287. Cc: Steven Rostedt <rostedt@goodmis.org>
  2288. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2289. Cc: Valentin Schneider <vschneid@redhat.com>
  2290. Cc: x86@kernel.org
  2291. Cc: linux-pm@vger.kernel.org
  2292. Cc: linux-kernel@vger.kernel.org
  2293. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2294. Patchset: intel-thread-director
  2295. ---
  2296. arch/x86/include/asm/msr-index.h | 2 ++
  2297. drivers/thermal/intel/intel_hfi.c | 40 +++++++++++++++++++++++++++++--
  2298. 2 files changed, 40 insertions(+), 2 deletions(-)
  2299. diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
  2300. index 978a3e203cdbb..35ca36a7f8896 100644
  2301. --- a/arch/x86/include/asm/msr-index.h
  2302. +++ b/arch/x86/include/asm/msr-index.h
  2303. @@ -1099,6 +1099,8 @@
  2304. /* Hardware Feedback Interface */
  2305. #define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
  2306. #define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
  2307. +#define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
  2308. +#define MSR_IA32_HW_FEEDBACK_CHAR 0x17d2
  2309. /* x2APIC locked status */
  2310. #define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
  2311. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2312. index 7ea6acce7107e..35d947f475508 100644
  2313. --- a/drivers/thermal/intel/intel_hfi.c
  2314. +++ b/drivers/thermal/intel/intel_hfi.c
  2315. @@ -48,6 +48,8 @@
  2316. /* Hardware Feedback Interface MSR configuration bits */
  2317. #define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
  2318. #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
  2319. +#define HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT BIT(1)
  2320. +#define HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT BIT(0)
  2321. /* CPUID detection and enumeration definitions for HFI */
  2322. @@ -72,6 +74,15 @@ union cpuid6_edx {
  2323. u32 full;
  2324. };
  2325. +union cpuid6_ecx {
  2326. + struct {
  2327. + u32 dont_care0:8;
  2328. + u32 nr_classes:8;
  2329. + u32 dont_care1:16;
  2330. + } split;
  2331. + u32 full;
  2332. +};
  2333. +
  2334. #ifdef CONFIG_IPC_CLASSES
  2335. union hfi_thread_feedback_char_msr {
  2336. struct {
  2337. @@ -506,6 +517,11 @@ void intel_hfi_online(unsigned int cpu)
  2338. init_hfi_cpu_index(info);
  2339. + if (cpu_feature_enabled(X86_FEATURE_ITD)) {
  2340. + msr_val = HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT;
  2341. + wrmsrl(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, msr_val);
  2342. + }
  2343. +
  2344. /*
  2345. * Now check if the HFI instance of the package/die of @cpu has been
  2346. * initialized (by checking its header). In such case, all we have to
  2347. @@ -561,8 +577,22 @@ void intel_hfi_online(unsigned int cpu)
  2348. */
  2349. rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
  2350. msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
  2351. +
  2352. + if (cpu_feature_enabled(X86_FEATURE_ITD))
  2353. + msr_val |= HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT;
  2354. +
  2355. wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
  2356. + /*
  2357. + * We have all we need to support IPC classes. Task classification is
  2358. + * now working.
  2359. + *
  2360. + * All class scores are zero until after the first HFI update. That is
  2361. + * OK. The scheduler queries these scores at every load balance.
  2362. + */
  2363. + if (cpu_feature_enabled(X86_FEATURE_ITD))
  2364. + sched_enable_ipc_classes();
  2365. +
  2366. unlock:
  2367. mutex_unlock(&hfi_instance_lock);
  2368. return;
  2369. @@ -640,8 +670,14 @@ static __init int hfi_parse_features(void)
  2370. */
  2371. hfi_features.class_stride = nr_capabilities;
  2372. - /* For now, use only one class of the HFI table */
  2373. - hfi_features.nr_classes = 1;
  2374. + if (cpu_feature_enabled(X86_FEATURE_ITD)) {
  2375. + union cpuid6_ecx ecx;
  2376. +
  2377. + ecx.full = cpuid_ecx(CPUID_HFI_LEAF);
  2378. + hfi_features.nr_classes = ecx.split.nr_classes;
  2379. + } else {
  2380. + hfi_features.nr_classes = 1;
  2381. + }
  2382. /*
  2383. * The header contains change indications for each supported feature.
  2384. --
  2385. 2.39.2
  2386. From 734cc5407daf6d98ff6c89f79bf1f794635f7617 Mon Sep 17 00:00:00 2001
  2387. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2388. Date: Mon, 6 Feb 2023 21:10:59 -0800
  2389. Subject: [PATCH] sched/task_struct: Add helpers for IPC classification
  2390. The unprocessed classification that hardware provides for a task may not
  2391. be usable by the scheduler: the classification may change too frequently or
  2392. architectures may want to consider extra factors. For instance, some
  2393. processors with Intel Thread Director need to consider the state of the SMT
  2394. siblings of a core.
  2395. Provide per-task helper variables that architectures can use to post-
  2396. process the classification that hardware provides.
  2397. Cc: Ben Segall <bsegall@google.com>
  2398. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2399. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2400. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2401. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2402. Cc: Len Brown <len.brown@intel.com>
  2403. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2404. Cc: Mel Gorman <mgorman@suse.de>
  2405. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2406. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2407. Cc: Steven Rostedt <rostedt@goodmis.org>
  2408. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2409. Cc: Valentin Schneider <vschneid@redhat.com>
  2410. Cc: x86@kernel.org
  2411. Cc: linux-pm@vger.kernel.org
  2412. Cc: linux-kernel@vger.kernel.org
  2413. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2414. Patchset: intel-thread-director
  2415. ---
  2416. include/linux/sched.h | 12 +++++++++++-
  2417. 1 file changed, 11 insertions(+), 1 deletion(-)
  2418. diff --git a/include/linux/sched.h b/include/linux/sched.h
  2419. index 4f96c3dd59d0b..582e14cf3f765 100644
  2420. --- a/include/linux/sched.h
  2421. +++ b/include/linux/sched.h
  2422. @@ -1529,7 +1529,17 @@ struct task_struct {
  2423. * A hardware-defined classification of task that reflects but is
  2424. * not identical to the number of instructions per cycle.
  2425. */
  2426. - unsigned short ipcc;
  2427. + unsigned int ipcc : 9;
  2428. + /*
  2429. + * A candidate classification that arch-specific implementations
  2430. + * qualify for correctness.
  2431. + */
  2432. + unsigned int ipcc_tmp : 9;
  2433. + /*
  2434. + * Counter to filter out transient candidate classifications
  2435. + * of a task.
  2436. + */
  2437. + unsigned int ipcc_cntr : 14;
  2438. #endif
  2439. /*
  2440. --
  2441. 2.39.2
  2442. From 41d3fb0009d226f33935191790774bec3460c3e1 Mon Sep 17 00:00:00 2001
  2443. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2444. Date: Mon, 6 Feb 2023 21:11:00 -0800
  2445. Subject: [PATCH] sched/core: Initialize helpers of task classification
  2446. Just as tasks start life unclassified, initialize the classification
  2447. auxiliar variables.
  2448. Cc: Ben Segall <bsegall@google.com>
  2449. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2450. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2451. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2452. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2453. Cc: Len Brown <len.brown@intel.com>
  2454. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2455. Cc: Mel Gorman <mgorman@suse.de>
  2456. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2457. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2458. Cc: Steven Rostedt <rostedt@goodmis.org>
  2459. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2460. Cc: Valentin Schneider <vschneid@redhat.com>
  2461. Cc: x86@kernel.org
  2462. Cc: linux-pm@vger.kernel.org
  2463. Cc: linux-kernel@vger.kernel.org
  2464. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2465. Patchset: intel-thread-director
  2466. ---
  2467. kernel/sched/core.c | 2 ++
  2468. 1 file changed, 2 insertions(+)
  2469. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  2470. index 0ab39cc055c77..2a942fc3c3094 100644
  2471. --- a/kernel/sched/core.c
  2472. +++ b/kernel/sched/core.c
  2473. @@ -4426,6 +4426,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
  2474. p->se.vruntime = 0;
  2475. #ifdef CONFIG_IPC_CLASSES
  2476. p->ipcc = IPC_CLASS_UNCLASSIFIED;
  2477. + p->ipcc_tmp = IPC_CLASS_UNCLASSIFIED;
  2478. + p->ipcc_cntr = 0;
  2479. #endif
  2480. INIT_LIST_HEAD(&p->se.group_node);
  2481. --
  2482. 2.39.2
  2483. From 4e8dc94941042de9905f32f1d8e1a49e8893d631 Mon Sep 17 00:00:00 2001
  2484. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2485. Date: Mon, 6 Feb 2023 21:11:01 -0800
  2486. Subject: [PATCH] sched/fair: Introduce sched_smt_siblings_idle()
  2487. X86 needs to know the idle state of the SMT siblings of a CPU to improve
  2488. the accuracy of IPCC classification. X86 implements support for IPC classes
  2489. in the thermal HFI driver.
  2490. Rename is_core_idle() as sched_smt_siblings_idle() and make it available
  2491. outside the scheduler code.
  2492. Cc: Ben Segall <bsegall@google.com>
  2493. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2494. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2495. Cc: Len Brown <len.brown@intel.com>
  2496. Cc: Mel Gorman <mgorman@suse.de>
  2497. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2498. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2499. Cc: Steven Rostedt <rostedt@goodmis.org>
  2500. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2501. Cc: Valentin Schneider <vschneid@redhat.com>
  2502. Cc: x86@kernel.org
  2503. Cc: linux-kernel@vger.kernel.org
  2504. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2505. Patchset: intel-thread-director
  2506. ---
  2507. include/linux/sched.h | 2 ++
  2508. kernel/sched/fair.c | 21 +++++++++++++++------
  2509. 2 files changed, 17 insertions(+), 6 deletions(-)
  2510. diff --git a/include/linux/sched.h b/include/linux/sched.h
  2511. index 582e14cf3f765..f2adf662eda83 100644
  2512. --- a/include/linux/sched.h
  2513. +++ b/include/linux/sched.h
  2514. @@ -2440,4 +2440,6 @@ static inline void sched_core_fork(struct task_struct *p) { }
  2515. extern void sched_set_stop_task(int cpu, struct task_struct *stop);
  2516. +extern bool sched_smt_siblings_idle(int cpu);
  2517. +
  2518. #endif
  2519. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  2520. index a9a105092e7c3..97c574d5fa575 100644
  2521. --- a/kernel/sched/fair.c
  2522. +++ b/kernel/sched/fair.c
  2523. @@ -1064,7 +1064,14 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  2524. * Scheduling class queueing methods:
  2525. */
  2526. -static inline bool is_core_idle(int cpu)
  2527. +/**
  2528. + * sched_smt_siblings_idle - Check whether SMT siblings of a CPU are idle
  2529. + * @cpu: The CPU to check
  2530. + *
  2531. + * Returns true if all the SMT siblings of @cpu are idle or @cpu does not have
  2532. + * SMT siblings. The idle state of @cpu is not considered.
  2533. + */
  2534. +bool sched_smt_siblings_idle(int cpu)
  2535. {
  2536. #ifdef CONFIG_SCHED_SMT
  2537. int sibling;
  2538. @@ -1767,7 +1774,7 @@ static inline int numa_idle_core(int idle_core, int cpu)
  2539. * Prefer cores instead of packing HT siblings
  2540. * and triggering future load balancing.
  2541. */
  2542. - if (is_core_idle(cpu))
  2543. + if (sched_smt_siblings_idle(cpu))
  2544. idle_core = cpu;
  2545. return idle_core;
  2546. @@ -9388,7 +9395,8 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
  2547. * If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
  2548. * is not sufficient. We need to make sure the whole core is idle.
  2549. */
  2550. - if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
  2551. + if (sds->local->flags & SD_SHARE_CPUCAPACITY &&
  2552. + !sched_smt_siblings_idle(env->dst_cpu))
  2553. return false;
  2554. /* Only do SMT checks if either local or candidate have SMT siblings. */
  2555. @@ -10557,7 +10565,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
  2556. sched_asym_prefer(i, env->dst_cpu) &&
  2557. nr_running == 1) {
  2558. if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
  2559. - (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
  2560. + (!(env->sd->flags & SD_SHARE_CPUCAPACITY) &&
  2561. + sched_smt_siblings_idle(i)))
  2562. continue;
  2563. }
  2564. @@ -10686,7 +10695,7 @@ asym_active_balance(struct lb_env *env)
  2565. * busy sibling.
  2566. */
  2567. return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
  2568. - !is_core_idle(env->src_cpu);
  2569. + !sched_smt_siblings_idle(env->src_cpu);
  2570. }
  2571. return false;
  2572. @@ -11433,7 +11442,7 @@ static void nohz_balancer_kick(struct rq *rq)
  2573. */
  2574. if (sd->flags & SD_SHARE_CPUCAPACITY ||
  2575. (!(sd->flags & SD_SHARE_CPUCAPACITY) &&
  2576. - is_core_idle(i))) {
  2577. + sched_smt_siblings_idle(i))) {
  2578. flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
  2579. goto unlock;
  2580. }
  2581. --
  2582. 2.39.2
  2583. From 0552b24fd1c1d40cd5b4a32d07afae3f3136d6c2 Mon Sep 17 00:00:00 2001
  2584. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2585. Date: Mon, 6 Feb 2023 21:11:02 -0800
  2586. Subject: [PATCH] thermal: intel: hfi: Implement model-specific checks for task
  2587. classification
  2588. In Alder Lake and Raptor Lake, the result of thread classification is more
  2589. accurate when only one SMT sibling is busy. Classification results for
  2590. class 2 and 3 are always reliable.
  2591. To avoid unnecessary migrations, only update the class of a task if it has
  2592. been the same during 4 consecutive user ticks.
  2593. Cc: Ben Segall <bsegall@google.com>
  2594. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2595. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2596. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2597. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2598. Cc: Len Brown <len.brown@intel.com>
  2599. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2600. Cc: Mel Gorman <mgorman@suse.de>
  2601. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2602. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2603. Cc: Steven Rostedt <rostedt@goodmis.org>
  2604. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2605. Cc: Valentin Schneider <vschneid@redhat.com>
  2606. Cc: x86@kernel.org
  2607. Cc: linux-pm@vger.kernel.org
  2608. Cc: linux-kernel@vger.kernel.org
  2609. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2610. Patchset: intel-thread-director
  2611. ---
  2612. drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
  2613. 1 file changed, 59 insertions(+), 1 deletion(-)
  2614. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2615. index 35d947f475508..fdb53e4cabc14 100644
  2616. --- a/drivers/thermal/intel/intel_hfi.c
  2617. +++ b/drivers/thermal/intel/intel_hfi.c
  2618. @@ -40,6 +40,7 @@
  2619. #include <linux/workqueue.h>
  2620. #include <asm/msr.h>
  2621. +#include <asm/intel-family.h>
  2622. #include "../thermal_core.h"
  2623. #include "intel_hfi.h"
  2624. @@ -209,9 +210,64 @@ static int __percpu *hfi_ipcc_scores;
  2625. */
  2626. #define HFI_UNCLASSIFIED_DEFAULT 1
  2627. +#define CLASS_DEBOUNCER_SKIPS 4
  2628. +
  2629. +/**
  2630. + * debounce_and_update_class() - Process and update a task's classification
  2631. + *
  2632. + * @p: The task of which the classification will be updated
  2633. + * @new_ipcc: The new IPC classification
  2634. + *
  2635. + * Update the classification of @p with the new value that hardware provides.
  2636. + * Only update the classification of @p if it has been the same during
  2637. + * CLASS_DEBOUNCER_SKIPS consecutive ticks.
  2638. + */
  2639. +static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
  2640. +{
  2641. + u16 debounce_skip;
  2642. +
  2643. + /* The class of @p changed. Only restart the debounce counter. */
  2644. + if (p->ipcc_tmp != new_ipcc) {
  2645. + p->ipcc_cntr = 1;
  2646. + goto out;
  2647. + }
  2648. +
  2649. + /*
  2650. + * The class of @p did not change. Update it if it has been the same
  2651. + * for CLASS_DEBOUNCER_SKIPS user ticks.
  2652. + */
  2653. + debounce_skip = p->ipcc_cntr + 1;
  2654. + if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
  2655. + p->ipcc_cntr++;
  2656. + else
  2657. + p->ipcc = new_ipcc;
  2658. +
  2659. +out:
  2660. + p->ipcc_tmp = new_ipcc;
  2661. +}
  2662. +
  2663. +static bool classification_is_accurate(u8 hfi_class, bool smt_siblings_idle)
  2664. +{
  2665. + switch (boot_cpu_data.x86_model) {
  2666. + case INTEL_FAM6_ALDERLAKE:
  2667. + case INTEL_FAM6_ALDERLAKE_L:
  2668. + case INTEL_FAM6_RAPTORLAKE:
  2669. + case INTEL_FAM6_RAPTORLAKE_P:
  2670. + case INTEL_FAM6_RAPTORLAKE_S:
  2671. + if (hfi_class == 3 || hfi_class == 2 || smt_siblings_idle)
  2672. + return true;
  2673. +
  2674. + return false;
  2675. +
  2676. + default:
  2677. + return true;
  2678. + }
  2679. +}
  2680. +
  2681. void intel_hfi_update_ipcc(struct task_struct *curr)
  2682. {
  2683. union hfi_thread_feedback_char_msr msr;
  2684. + bool idle;
  2685. /* We should not be here if ITD is not supported. */
  2686. if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
  2687. @@ -227,7 +283,9 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
  2688. * 0 is a valid classification for Intel Thread Director. A scheduler
  2689. * IPCC class of 0 means that the task is unclassified. Adjust.
  2690. */
  2691. - curr->ipcc = msr.split.classid + 1;
  2692. + idle = sched_smt_siblings_idle(task_cpu(curr));
  2693. + if (classification_is_accurate(msr.split.classid, idle))
  2694. + debounce_and_update_class(curr, msr.split.classid + 1);
  2695. }
  2696. unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
  2697. --
  2698. 2.39.2
  2699. From ea77b647f82ae1b9b57f60841b2aad7cb89bbc92 Mon Sep 17 00:00:00 2001
  2700. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2701. Date: Mon, 6 Feb 2023 21:11:03 -0800
  2702. Subject: [PATCH] x86/cpufeatures: Add feature bit for HRESET
  2703. The HRESET instruction prevents the classification of the current task
  2704. from influencing the classification of the next task when running serially
  2705. on the same logical processor.
  2706. Cc: Ben Segall <bsegall@google.com>
  2707. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2708. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2709. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2710. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2711. Cc: Len Brown <len.brown@intel.com>
  2712. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2713. Cc: Mel Gorman <mgorman@suse.de>
  2714. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2715. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2716. Cc: Steven Rostedt <rostedt@goodmis.org>
  2717. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2718. Cc: Valentin Schneider <vschneid@redhat.com>
  2719. Cc: x86@kernel.org
  2720. Cc: linux-pm@vger.kernel.org
  2721. Cc: linux-kernel@vger.kernel.org
  2722. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2723. Patchset: intel-thread-director
  2724. ---
  2725. arch/x86/include/asm/cpufeatures.h | 1 +
  2726. arch/x86/include/asm/msr-index.h | 4 +++-
  2727. arch/x86/kernel/cpu/scattered.c | 1 +
  2728. 3 files changed, 5 insertions(+), 1 deletion(-)
  2729. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
  2730. index a2f2730737aeb..0a64e6bc67b13 100644
  2731. --- a/arch/x86/include/asm/cpufeatures.h
  2732. +++ b/arch/x86/include/asm/cpufeatures.h
  2733. @@ -307,6 +307,7 @@
  2734. #define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
  2735. #define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
  2736. #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
  2737. +#define X86_FEATURE_HRESET (11*32+23) /* Hardware history reset instruction */
  2738. /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
  2739. #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
  2740. diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
  2741. index 35ca36a7f8896..4e6b1eddd7339 100644
  2742. --- a/arch/x86/include/asm/msr-index.h
  2743. +++ b/arch/x86/include/asm/msr-index.h
  2744. @@ -1102,6 +1102,9 @@
  2745. #define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
  2746. #define MSR_IA32_HW_FEEDBACK_CHAR 0x17d2
  2747. +/* Hardware History Reset */
  2748. +#define MSR_IA32_HW_HRESET_ENABLE 0x17da
  2749. +
  2750. /* x2APIC locked status */
  2751. #define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
  2752. #define LEGACY_XAPIC_DISABLED BIT(0) /*
  2753. @@ -1109,5 +1112,4 @@
  2754. * disabling x2APIC will cause
  2755. * a #GP
  2756. */
  2757. -
  2758. #endif /* _ASM_X86_MSR_INDEX_H */
  2759. diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
  2760. index f53944fb8f7f9..66bc5713644dc 100644
  2761. --- a/arch/x86/kernel/cpu/scattered.c
  2762. +++ b/arch/x86/kernel/cpu/scattered.c
  2763. @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
  2764. { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
  2765. { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
  2766. { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
  2767. + { X86_FEATURE_HRESET, CPUID_EAX, 22, 0x00000007, 1 },
  2768. { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
  2769. { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
  2770. { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
  2771. --
  2772. 2.39.2
  2773. From 98f46411379b4192bc6070a38628c32e880854a8 Mon Sep 17 00:00:00 2001
  2774. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2775. Date: Mon, 6 Feb 2023 21:11:04 -0800
  2776. Subject: [PATCH] x86/hreset: Configure history reset
  2777. Configure the MSR that controls the behavior of HRESET on each logical
  2778. processor.
  2779. Cc: Ben Segall <bsegall@google.com>
  2780. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2781. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2782. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2783. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2784. Cc: Len Brown <len.brown@intel.com>
  2785. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2786. Cc: Mel Gorman <mgorman@suse.de>
  2787. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2788. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2789. Cc: Steven Rostedt <rostedt@goodmis.org>
  2790. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2791. Cc: Valentin Schneider <vschneid@redhat.com>
  2792. Cc: x86@kernel.org
  2793. Cc: linux-pm@vger.kernel.org
  2794. Cc: linux-kernel@vger.kernel.org
  2795. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2796. Patchset: intel-thread-director
  2797. ---
  2798. arch/x86/kernel/cpu/common.c | 23 ++++++++++++++++++++++-
  2799. 1 file changed, 22 insertions(+), 1 deletion(-)
  2800. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
  2801. index 6a25e93f2a87c..ae250426af286 100644
  2802. --- a/arch/x86/kernel/cpu/common.c
  2803. +++ b/arch/x86/kernel/cpu/common.c
  2804. @@ -412,6 +412,26 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
  2805. cr4_clear_bits(X86_CR4_UMIP);
  2806. }
  2807. +static u32 hardware_history_features __ro_after_init;
  2808. +
  2809. +static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
  2810. +{
  2811. + if (!cpu_feature_enabled(X86_FEATURE_HRESET))
  2812. + return;
  2813. +
  2814. + /*
  2815. + * Use on all CPUs the hardware history features that the boot
  2816. + * CPU supports.
  2817. + */
  2818. + if (c == &boot_cpu_data)
  2819. + hardware_history_features = cpuid_ebx(0x20);
  2820. +
  2821. + if (!hardware_history_features)
  2822. + return;
  2823. +
  2824. + wrmsrl(MSR_IA32_HW_HRESET_ENABLE, hardware_history_features);
  2825. +}
  2826. +
  2827. /* These bits should not change their value after CPU init is finished. */
  2828. static const unsigned long cr4_pinned_mask =
  2829. X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
  2830. @@ -1849,10 +1869,11 @@ static void identify_cpu(struct cpuinfo_x86 *c)
  2831. /* Disable the PN if appropriate */
  2832. squash_the_stupid_serial_number(c);
  2833. - /* Set up SMEP/SMAP/UMIP */
  2834. + /* Set up SMEP/SMAP/UMIP/HRESET */
  2835. setup_smep(c);
  2836. setup_smap(c);
  2837. setup_umip(c);
  2838. + setup_hreset(c);
  2839. /* Enable FSGSBASE instructions if available. */
  2840. if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
  2841. --
  2842. 2.39.2
  2843. From 296067cf1027b437407e587a6cb2a0a7bdf6c503 Mon Sep 17 00:00:00 2001
  2844. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2845. Date: Mon, 6 Feb 2023 21:11:05 -0800
  2846. Subject: [PATCH] x86/process: Reset hardware history in context switch
  2847. Reset the classification history of the current task when switching to the
  2848. next task. Hardware will start the classification of the next task from
  2849. scratch.
  2850. Cc: Ben Segall <bsegall@google.com>
  2851. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2852. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2853. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2854. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2855. Cc: Len Brown <len.brown@intel.com>
  2856. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2857. Cc: Mel Gorman <mgorman@suse.de>
  2858. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2859. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2860. Cc: Steven Rostedt <rostedt@goodmis.org>
  2861. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2862. Cc: Valentin Schneider <vschneid@redhat.com>
  2863. Cc: x86@kernel.org
  2864. Cc: linux-pm@vger.kernel.org
  2865. Cc: linux-kernel@vger.kernel.org
  2866. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2867. Patchset: intel-thread-director
  2868. ---
  2869. arch/x86/include/asm/hreset.h | 30 ++++++++++++++++++++++++++++++
  2870. arch/x86/kernel/cpu/common.c | 7 +++++++
  2871. arch/x86/kernel/process_32.c | 3 +++
  2872. arch/x86/kernel/process_64.c | 3 +++
  2873. 4 files changed, 43 insertions(+)
  2874. create mode 100644 arch/x86/include/asm/hreset.h
  2875. diff --git a/arch/x86/include/asm/hreset.h b/arch/x86/include/asm/hreset.h
  2876. new file mode 100644
  2877. index 0000000000000..d68ca2fb8642b
  2878. --- /dev/null
  2879. +++ b/arch/x86/include/asm/hreset.h
  2880. @@ -0,0 +1,30 @@
  2881. +/* SPDX-License-Identifier: GPL-2.0 */
  2882. +#ifndef _ASM_X86_HRESET_H
  2883. +
  2884. +/**
  2885. + * HRESET - History reset. Available since binutils v2.36.
  2886. + *
  2887. + * Request the processor to reset the history of task classification on the
  2888. + * current logical processor. The history components to be
  2889. + * reset are specified in %eax. Only bits specified in CPUID(0x20).EBX
  2890. + * and enabled in the IA32_HRESET_ENABLE MSR can be selected.
  2891. + *
  2892. + * The assembly code looks like:
  2893. + *
  2894. + * hreset %eax
  2895. + *
  2896. + * The corresponding machine code looks like:
  2897. + *
  2898. + * F3 0F 3A F0 ModRM Imm
  2899. + *
  2900. + * The value of ModRM is 0xc0 to specify %eax register addressing.
  2901. + * The ignored immediate operand is set to 0.
  2902. + *
  2903. + * The instruction is documented in the Intel SDM.
  2904. + */
  2905. +
  2906. +#define __ASM_HRESET ".byte 0xf3, 0xf, 0x3a, 0xf0, 0xc0, 0x0"
  2907. +
  2908. +void reset_hardware_history(void);
  2909. +
  2910. +#endif /* _ASM_X86_HRESET_H */
  2911. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
  2912. index ae250426af286..c5c835c2a6195 100644
  2913. --- a/arch/x86/kernel/cpu/common.c
  2914. +++ b/arch/x86/kernel/cpu/common.c
  2915. @@ -53,6 +53,7 @@
  2916. #include <asm/mce.h>
  2917. #include <asm/msr.h>
  2918. #include <asm/cacheinfo.h>
  2919. +#include <asm/hreset.h>
  2920. #include <asm/memtype.h>
  2921. #include <asm/microcode.h>
  2922. #include <asm/microcode_intel.h>
  2923. @@ -414,6 +415,12 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
  2924. static u32 hardware_history_features __ro_after_init;
  2925. +void reset_hardware_history(void)
  2926. +{
  2927. + asm_inline volatile (ALTERNATIVE("", __ASM_HRESET, X86_FEATURE_HRESET)
  2928. + : : "a" (hardware_history_features) : "memory");
  2929. +}
  2930. +
  2931. static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
  2932. {
  2933. if (!cpu_feature_enabled(X86_FEATURE_HRESET))
  2934. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
  2935. index 708c87b88cc15..7353bb119e79c 100644
  2936. --- a/arch/x86/kernel/process_32.c
  2937. +++ b/arch/x86/kernel/process_32.c
  2938. @@ -52,6 +52,7 @@
  2939. #include <asm/switch_to.h>
  2940. #include <asm/vm86.h>
  2941. #include <asm/resctrl.h>
  2942. +#include <asm/hreset.h>
  2943. #include <asm/proto.h>
  2944. #include "process.h"
  2945. @@ -214,6 +215,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  2946. /* Load the Intel cache allocation PQR MSR. */
  2947. resctrl_sched_in(next_p);
  2948. + reset_hardware_history();
  2949. +
  2950. return prev_p;
  2951. }
  2952. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
  2953. index bb65a68b4b499..eb204809890d2 100644
  2954. --- a/arch/x86/kernel/process_64.c
  2955. +++ b/arch/x86/kernel/process_64.c
  2956. @@ -53,6 +53,7 @@
  2957. #include <asm/xen/hypervisor.h>
  2958. #include <asm/vdso.h>
  2959. #include <asm/resctrl.h>
  2960. +#include <asm/hreset.h>
  2961. #include <asm/unistd.h>
  2962. #include <asm/fsgsbase.h>
  2963. #ifdef CONFIG_IA32_EMULATION
  2964. @@ -658,6 +659,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  2965. /* Load the Intel cache allocation PQR MSR. */
  2966. resctrl_sched_in(next_p);
  2967. + reset_hardware_history();
  2968. +
  2969. return prev_p;
  2970. }
  2971. --
  2972. 2.39.2