0015-intel-thread-director.patch 112 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306
  1. From 8cb6f5bf4f355b0169e3f60c559a5e2203c1a988 Mon Sep 17 00:00:00 2001
  2. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  3. Date: Tue, 18 Oct 2022 04:22:40 -0700
  4. Subject: [PATCH] thermal: intel: hfi: Improve the type of
  5. hfi_features::nr_table_pages
  6. A Coverity static code scan raised a potential overflow_before_widen
  7. warning when hfi_features::nr_table_pages is used as an argument to
  8. memcpy in intel_hfi_process_event().
  9. Even though the overflow can never happen (the maximum number of pages of
  10. the HFI table is 0x10 and 0x10 << PAGE_SHIFT = 0x10000), using size_t as
  11. the data type of hfi_features::nr_table_pages makes Coverity happy and
  12. matches the data type of the argument 'size' of memcpy().
  13. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  14. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  15. Patchset: intel-thread-director
  16. ---
  17. drivers/thermal/intel/intel_hfi.c | 2 +-
  18. 1 file changed, 1 insertion(+), 1 deletion(-)
  19. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  20. index a0640f762dc5d..239afe02e5182 100644
  21. --- a/drivers/thermal/intel/intel_hfi.c
  22. +++ b/drivers/thermal/intel/intel_hfi.c
  23. @@ -137,7 +137,7 @@ struct hfi_instance {
  24. * Parameters and supported features that are common to all HFI instances
  25. */
  26. struct hfi_features {
  27. - unsigned int nr_table_pages;
  28. + size_t nr_table_pages;
  29. unsigned int cpu_stride;
  30. unsigned int hdr_size;
  31. };
  32. --
  33. 2.39.2
  34. From 8f590ad883a1330a99f0bbd18d76c4631d7fddbb Mon Sep 17 00:00:00 2001
  35. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  36. Date: Mon, 6 Feb 2023 20:58:29 -0800
  37. Subject: [PATCH] sched/fair: Generalize asym_packing logic for SMT cores
  38. When doing asym_packing load balancing between cores, all we care is that
  39. the destination core is fully idle (including SMT siblings, if any) and
  40. that the busiest candidate scheduling group has exactly one busy CPU. It is
  41. irrelevant whether the candidate busiest core is non-SMT, SMT2, SMT4, SMT8,
  42. etc.
  43. Do not handle the candidate busiest non-SMT vs SMT cases separately. Simply
  44. do the two checks described above. Let find_busiest_group() handle bigger
  45. imbalances in the number of idle CPUs.
  46. Cc: Ben Segall <bsegall@google.com>
  47. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  48. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  49. Cc: Len Brown <len.brown@intel.com>
  50. Cc: Mel Gorman <mgorman@suse.de>
  51. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  52. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  53. Cc: Steven Rostedt <rostedt@goodmis.org>
  54. Cc: Tim C. Chen <tim.c.chen@intel.com>
  55. Cc: Valentin Schneider <vschneid@redhat.com>
  56. Cc: x86@kernel.org
  57. Cc: linux-kernel@vger.kernel.org
  58. Reviewed-by: Len Brown <len.brown@intel.com>
  59. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  60. Tested-by: Zhang Rui <rui.zhang@intel.com>
  61. Patchset: intel-thread-director
  62. ---
  63. kernel/sched/fair.c | 41 ++++++++++++++---------------------------
  64. 1 file changed, 14 insertions(+), 27 deletions(-)
  65. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  66. index 2c3d0d49c80ea..8b5fc8e86addb 100644
  67. --- a/kernel/sched/fair.c
  68. +++ b/kernel/sched/fair.c
  69. @@ -9042,13 +9042,11 @@ group_type group_classify(unsigned int imbalance_pct,
  70. * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
  71. * only if @dst_cpu has higher priority.
  72. *
  73. - * If both @dst_cpu and @sg have SMT siblings, and @sg has exactly one more
  74. - * busy CPU than @sds::local, let @dst_cpu pull tasks if it has higher priority.
  75. - * Bigger imbalances in the number of busy CPUs will be dealt with in
  76. - * update_sd_pick_busiest().
  77. - *
  78. - * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
  79. - * of @dst_cpu are idle and @sg has lower priority.
  80. + * If @dst_cpu has SMT siblings, check if there are no running tasks in
  81. + * @sds::local. In such case, decide based on the priority of @sg. Do it only
  82. + * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
  83. + * imbalances in the number of busy CPUs will be dealt with in
  84. + * find_busiest_group().
  85. *
  86. * Return: true if @dst_cpu can pull tasks, false otherwise.
  87. */
  88. @@ -9057,12 +9055,10 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
  89. struct sched_group *sg)
  90. {
  91. #ifdef CONFIG_SCHED_SMT
  92. - bool local_is_smt, sg_is_smt;
  93. + bool local_is_smt;
  94. int sg_busy_cpus;
  95. local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
  96. - sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
  97. -
  98. sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
  99. if (!local_is_smt) {
  100. @@ -9083,25 +9079,16 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
  101. return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  102. }
  103. - /* @dst_cpu has SMT siblings. */
  104. -
  105. - if (sg_is_smt) {
  106. - int local_busy_cpus = sds->local->group_weight -
  107. - sds->local_stat.idle_cpus;
  108. - int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
  109. -
  110. - if (busy_cpus_delta == 1)
  111. - return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  112. -
  113. - return false;
  114. - }
  115. -
  116. /*
  117. - * @sg does not have SMT siblings. Ensure that @sds::local does not end
  118. - * up with more than one busy SMT sibling and only pull tasks if there
  119. - * are not busy CPUs (i.e., no CPU has running tasks).
  120. + * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
  121. + * all its siblings are idle (moving tasks between physical cores in
  122. + * which some SMT siblings are busy results in the same throughput).
  123. + *
  124. + * If the difference in the number of busy CPUs is two or more, let
  125. + * find_busiest_group() take care of it. We only care if @sg has
  126. + * exactly one busy CPU. This covers SMT and non-SMT sched groups.
  127. */
  128. - if (!sds->local_stat.sum_nr_running)
  129. + if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
  130. return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  131. return false;
  132. --
  133. 2.39.2
  134. From 5a3b5eb5f79e51634f5fd173c0949c5293c93566 Mon Sep 17 00:00:00 2001
  135. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  136. Date: Mon, 6 Feb 2023 20:58:30 -0800
  137. Subject: [PATCH] sched/fair: Move is_core_idle() out of CONFIG_NUMA
  138. asym_packing needs this function to determine whether an SMT core is a
  139. suitable destination for load balancing.
  140. Cc: Ben Segall <bsegall@google.com>
  141. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  142. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  143. Cc: Len Brown <len.brown@intel.com>
  144. Cc: Mel Gorman <mgorman@suse.de>
  145. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  146. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  147. Cc: Steven Rostedt <rostedt@goodmis.org>
  148. Cc: Tim C. Chen <tim.c.chen@intel.com>
  149. Cc: Valentin Schneider <vschneid@redhat.com>
  150. Cc: x86@kernel.org
  151. Cc: linux-kernel@vger.kernel.org
  152. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  153. Tested-by: Zhang Rui <rui.zhang@intel.com>
  154. Patchset: intel-thread-director
  155. ---
  156. kernel/sched/fair.c | 34 +++++++++++++++++-----------------
  157. 1 file changed, 17 insertions(+), 17 deletions(-)
  158. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  159. index 8b5fc8e86addb..98c64f1db20e0 100644
  160. --- a/kernel/sched/fair.c
  161. +++ b/kernel/sched/fair.c
  162. @@ -1049,6 +1049,23 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  163. * Scheduling class queueing methods:
  164. */
  165. +static inline bool is_core_idle(int cpu)
  166. +{
  167. +#ifdef CONFIG_SCHED_SMT
  168. + int sibling;
  169. +
  170. + for_each_cpu(sibling, cpu_smt_mask(cpu)) {
  171. + if (cpu == sibling)
  172. + continue;
  173. +
  174. + if (!idle_cpu(sibling))
  175. + return false;
  176. + }
  177. +#endif
  178. +
  179. + return true;
  180. +}
  181. +
  182. #ifdef CONFIG_NUMA
  183. #define NUMA_IMBALANCE_MIN 2
  184. @@ -1688,23 +1705,6 @@ struct numa_stats {
  185. int idle_cpu;
  186. };
  187. -static inline bool is_core_idle(int cpu)
  188. -{
  189. -#ifdef CONFIG_SCHED_SMT
  190. - int sibling;
  191. -
  192. - for_each_cpu(sibling, cpu_smt_mask(cpu)) {
  193. - if (cpu == sibling)
  194. - continue;
  195. -
  196. - if (!idle_cpu(sibling))
  197. - return false;
  198. - }
  199. -#endif
  200. -
  201. - return true;
  202. -}
  203. -
  204. struct task_numa_env {
  205. struct task_struct *p;
  206. --
  207. 2.39.2
  208. From d4ba60a8be784dc7ed866fb52ff94519eb9d1586 Mon Sep 17 00:00:00 2001
  209. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  210. Date: Mon, 6 Feb 2023 20:58:31 -0800
  211. Subject: [PATCH] sched/fair: Only do asym_packing load balancing from fully
  212. idle SMT cores
  213. When balancing load between cores, all the SMT siblings of the destination
  214. CPU, if any, must be idle. Otherwise, pulling new tasks degrades the
  215. throughput of the busy SMT siblings. The overall throughput of the system
  216. remains the same.
  217. When balancing load within an SMT core this consideration is not relevant
  218. relevant. Follow the priorities that hardware indicates.
  219. Using is_core_idle() renders checking !sds->local_stat.sum_nr_running
  220. redundant. Remove it.
  221. Cc: Ben Segall <bsegall@google.com>
  222. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  223. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  224. Cc: Len Brown <len.brown@intel.com>
  225. Cc: Mel Gorman <mgorman@suse.de>
  226. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  227. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  228. Cc: Steven Rostedt <rostedt@goodmis.org>
  229. Cc: Tim C. Chen <tim.c.chen@intel.com>
  230. Cc: Valentin Schneider <vschneid@redhat.com>
  231. Cc: x86@kernel.org
  232. Cc: linux-kernel@vger.kernel.org
  233. Suggested-by: Valentin Schneider <vschneid@redhat.com>
  234. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  235. Tested-by: Zhang Rui <rui.zhang@intel.com>
  236. Patchset: intel-thread-director
  237. ---
  238. kernel/sched/fair.c | 34 +++++++++++++++++++++++++---------
  239. 1 file changed, 25 insertions(+), 9 deletions(-)
  240. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  241. index 98c64f1db20e0..f74777fc78d7d 100644
  242. --- a/kernel/sched/fair.c
  243. +++ b/kernel/sched/fair.c
  244. @@ -9038,12 +9038,14 @@ group_type group_classify(unsigned int imbalance_pct,
  245. * Check the state of the SMT siblings of both @sds::local and @sg and decide
  246. * if @dst_cpu can pull tasks.
  247. *
  248. + * This function must be called only if all the SMT siblings of @dst_cpu are
  249. + * idle, if any.
  250. + *
  251. * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
  252. * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
  253. * only if @dst_cpu has higher priority.
  254. *
  255. - * If @dst_cpu has SMT siblings, check if there are no running tasks in
  256. - * @sds::local. In such case, decide based on the priority of @sg. Do it only
  257. + * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
  258. * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
  259. * imbalances in the number of busy CPUs will be dealt with in
  260. * find_busiest_group().
  261. @@ -9080,15 +9082,13 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
  262. }
  263. /*
  264. - * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
  265. - * all its siblings are idle (moving tasks between physical cores in
  266. - * which some SMT siblings are busy results in the same throughput).
  267. + * @dst_cpu has SMT siblings and are also idle.
  268. *
  269. * If the difference in the number of busy CPUs is two or more, let
  270. * find_busiest_group() take care of it. We only care if @sg has
  271. * exactly one busy CPU. This covers SMT and non-SMT sched groups.
  272. */
  273. - if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
  274. + if (sg_busy_cpus == 1)
  275. return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  276. return false;
  277. @@ -9102,7 +9102,14 @@ static inline bool
  278. sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs,
  279. struct sched_group *group)
  280. {
  281. - /* Only do SMT checks if either local or candidate have SMT siblings */
  282. + /*
  283. + * If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
  284. + * is not sufficient. We need to make sure the whole core is idle.
  285. + */
  286. + if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
  287. + return false;
  288. +
  289. + /* Only do SMT checks if either local or candidate have SMT siblings. */
  290. if ((sds->local->flags & SD_SHARE_CPUCAPACITY) ||
  291. (group->flags & SD_SHARE_CPUCAPACITY))
  292. return asym_smt_can_pull_tasks(env->dst_cpu, sds, sgs, group);
  293. @@ -11049,8 +11056,17 @@ static void nohz_balancer_kick(struct rq *rq)
  294. */
  295. for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
  296. if (sched_asym_prefer(i, cpu)) {
  297. - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
  298. - goto unlock;
  299. + /*
  300. + * Always do ASYM_PACKING balance in the SMT
  301. + * domain. In upper domains, the core must be
  302. + * fully idle.
  303. + */
  304. + if (sd->flags & SD_SHARE_CPUCAPACITY ||
  305. + (!(sd->flags & SD_SHARE_CPUCAPACITY) &&
  306. + is_core_idle(i))) {
  307. + flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
  308. + goto unlock;
  309. + }
  310. }
  311. }
  312. }
  313. --
  314. 2.39.2
  315. From 20641917bc6bad6f81bafe1bac213c1c3e70ed09 Mon Sep 17 00:00:00 2001
  316. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  317. Date: Mon, 6 Feb 2023 20:58:32 -0800
  318. Subject: [PATCH] sched/fair: Let low-priority cores help high-priority busy
  319. SMT cores
  320. Using asym_packing priorities within an SMT core is straightforward. Just
  321. follow the priorities that hardware indicates.
  322. When balancing load from an SMT core, also consider the idle of its
  323. siblings. Priorities do not reflect that an SMT core divides its throughput
  324. among all its busy siblings. They only makes sense when exactly one sibling
  325. is busy.
  326. Indicate that active balance is needed if the destination CPU has lower
  327. priority than the source CPU but the latter has busy SMT siblings.
  328. Make find_busiest_queue() not skip higher-priority SMT cores with more than
  329. busy sibling.
  330. Cc: Ben Segall <bsegall@google.com>
  331. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  332. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  333. Cc: Len Brown <len.brown@intel.com>
  334. Cc: Mel Gorman <mgorman@suse.de>
  335. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  336. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  337. Cc: Steven Rostedt <rostedt@goodmis.org>
  338. Cc: Tim C. Chen <tim.c.chen@intel.com>
  339. Cc: Valentin Schneider <vschneid@redhat.com>
  340. Cc: x86@kernel.org
  341. Cc: linux-kernel@vger.kernel.org
  342. Suggested-by: Valentin Schneider <vschneid@redhat.com>
  343. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  344. Tested-by: Zhang Rui <rui.zhang@intel.com>
  345. Patchset: intel-thread-director
  346. ---
  347. kernel/sched/fair.c | 31 ++++++++++++++++++++++++++-----
  348. 1 file changed, 26 insertions(+), 5 deletions(-)
  349. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  350. index f74777fc78d7d..24183e3eb3d47 100644
  351. --- a/kernel/sched/fair.c
  352. +++ b/kernel/sched/fair.c
  353. @@ -10224,11 +10224,20 @@ static struct rq *find_busiest_queue(struct lb_env *env,
  354. nr_running == 1)
  355. continue;
  356. - /* Make sure we only pull tasks from a CPU of lower priority */
  357. + /*
  358. + * Make sure we only pull tasks from a CPU of lower priority
  359. + * when balancing between SMT siblings.
  360. + *
  361. + * If balancing between cores, let lower priority CPUs help
  362. + * SMT cores with more than one busy sibling.
  363. + */
  364. if ((env->sd->flags & SD_ASYM_PACKING) &&
  365. sched_asym_prefer(i, env->dst_cpu) &&
  366. - nr_running == 1)
  367. - continue;
  368. + nr_running == 1) {
  369. + if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
  370. + (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
  371. + continue;
  372. + }
  373. switch (env->migration_type) {
  374. case migrate_load:
  375. @@ -10318,8 +10327,20 @@ asym_active_balance(struct lb_env *env)
  376. * lower priority CPUs in order to pack all tasks in the
  377. * highest priority CPUs.
  378. */
  379. - return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
  380. - sched_asym_prefer(env->dst_cpu, env->src_cpu);
  381. + if (env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING)) {
  382. + /* Always obey priorities between SMT siblings. */
  383. + if (env->sd->flags & SD_SHARE_CPUCAPACITY)
  384. + return sched_asym_prefer(env->dst_cpu, env->src_cpu);
  385. +
  386. + /*
  387. + * A lower priority CPU can help an SMT core with more than one
  388. + * busy sibling.
  389. + */
  390. + return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
  391. + !is_core_idle(env->src_cpu);
  392. + }
  393. +
  394. + return false;
  395. }
  396. static inline bool
  397. --
  398. 2.39.2
  399. From c1e77e8eea55b2d16b80c6dde3bcb3cf7e232aa5 Mon Sep 17 00:00:00 2001
  400. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  401. Date: Mon, 6 Feb 2023 20:58:33 -0800
  402. Subject: [PATCH] sched/fair: Keep a fully_busy SMT sched group as busiest
  403. When comparing two fully_busy scheduling groups, keep the current busiest
  404. group if it represents an SMT core. Tasks in such scheduling group share
  405. CPU resources and need more help than tasks in a non-SMT fully_busy group.
  406. Cc: Ben Segall <bsegall@google.com>
  407. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  408. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  409. Cc: Len Brown <len.brown@intel.com>
  410. Cc: Mel Gorman <mgorman@suse.de>
  411. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  412. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  413. Cc: Steven Rostedt <rostedt@goodmis.org>
  414. Cc: Tim C. Chen <tim.c.chen@intel.com>
  415. Cc: Valentin Schneider <vschneid@redhat.com>
  416. Cc: x86@kernel.org
  417. Cc: linux-kernel@vger.kernel.org
  418. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  419. Tested-by: Zhang Rui <rui.zhang@intel.com>
  420. Patchset: intel-thread-director
  421. ---
  422. kernel/sched/fair.c | 16 ++++++++++++++--
  423. 1 file changed, 14 insertions(+), 2 deletions(-)
  424. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  425. index 24183e3eb3d47..30b0e8476d1c6 100644
  426. --- a/kernel/sched/fair.c
  427. +++ b/kernel/sched/fair.c
  428. @@ -9302,10 +9302,22 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  429. * contention when accessing shared HW resources.
  430. *
  431. * XXX for now avg_load is not computed and always 0 so we
  432. - * select the 1st one.
  433. + * select the 1st one, except if @sg is composed of SMT
  434. + * siblings.
  435. */
  436. - if (sgs->avg_load <= busiest->avg_load)
  437. +
  438. + if (sgs->avg_load < busiest->avg_load)
  439. return false;
  440. +
  441. + if (sgs->avg_load == busiest->avg_load) {
  442. + /*
  443. + * SMT sched groups need more help than non-SMT groups.
  444. + * If @sg happens to also be SMT, either choice is good.
  445. + */
  446. + if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
  447. + return false;
  448. + }
  449. +
  450. break;
  451. case group_has_spare:
  452. --
  453. 2.39.2
  454. From a6d2b260d711873add514001a4eca293ff40c860 Mon Sep 17 00:00:00 2001
  455. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  456. Date: Mon, 6 Feb 2023 20:58:34 -0800
  457. Subject: [PATCH] sched/fair: Use the prefer_sibling flag of the current sched
  458. domain
  459. SD_PREFER_SIBLING is set from the SMT scheduling domain up to the first
  460. non-NUMA domain (the exception is systems with SD_ASYM_CPUCAPACITY).
  461. Above the SMT sched domain, all domains have a child. The SD_PREFER_
  462. SIBLING is honored always regardless of the scheduling domain at which the
  463. load balance takes place.
  464. There are cases, however, in which the busiest CPU's sched domain has
  465. child but the destination CPU's does not. Consider, for instance a non-SMT
  466. core (or an SMT core with only one online sibling) doing load balance with
  467. an SMT core at the MC level. SD_PREFER_SIBLING will not be honored. We are
  468. left with a fully busy SMT core and an idle non-SMT core.
  469. Avoid inconsistent behavior. Use the prefer_sibling behavior at the current
  470. scheduling domain, not its child.
  471. The NUMA sched domain does not have the SD_PREFER_SIBLING flag. Thus, we
  472. will not spread load among NUMA sched groups, as desired.
  473. Cc: Ben Segall <bsegall@google.com>
  474. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  475. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  476. Cc: Len Brown <len.brown@intel.com>
  477. Cc: Mel Gorman <mgorman@suse.de>
  478. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  479. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  480. Cc: Steven Rostedt <rostedt@goodmis.org>
  481. Cc: Tim C. Chen <tim.c.chen@intel.com>
  482. Cc: Valentin Schneider <vschneid@redhat.com>
  483. Cc: x86@kernel.org
  484. Cc: linux-kernel@vger.kernel.org
  485. Suggested-by: Valentin Schneider <vschneid@redhat.com>
  486. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  487. Tested-by: Zhang Rui <rui.zhang@intel.com>
  488. Patchset: intel-thread-director
  489. ---
  490. kernel/sched/fair.c | 10 +++++-----
  491. 1 file changed, 5 insertions(+), 5 deletions(-)
  492. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  493. index 30b0e8476d1c6..9e98cfcf1e48b 100644
  494. --- a/kernel/sched/fair.c
  495. +++ b/kernel/sched/fair.c
  496. @@ -9792,7 +9792,6 @@ static void update_idle_cpu_scan(struct lb_env *env,
  497. static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)
  498. {
  499. - struct sched_domain *child = env->sd->child;
  500. struct sched_group *sg = env->sd->groups;
  501. struct sg_lb_stats *local = &sds->local_stat;
  502. struct sg_lb_stats tmp_sgs;
  503. @@ -9833,9 +9832,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
  504. sg = sg->next;
  505. } while (sg != env->sd->groups);
  506. - /* Tag domain that child domain prefers tasks go to siblings first */
  507. - sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
  508. -
  509. + /*
  510. + * Tag domain that @env::sd prefers to spread excess tasks among
  511. + * sibling sched groups.
  512. + */
  513. + sds->prefer_sibling = env->sd->flags & SD_PREFER_SIBLING;
  514. if (env->sd->flags & SD_NUMA)
  515. env->fbq_type = fbq_classify_group(&sds->busiest_stat);
  516. @@ -10134,7 +10135,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
  517. goto out_balanced;
  518. }
  519. - /* Try to move all excess tasks to child's sibling domain */
  520. if (sds.prefer_sibling && local->group_type == group_has_spare &&
  521. busiest->sum_nr_running > local->sum_nr_running + 1)
  522. goto force_balance;
  523. --
  524. 2.39.2
  525. From 08ff98c145516a1013cc5cc4f1d9b0d36388ec90 Mon Sep 17 00:00:00 2001
  526. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  527. Date: Mon, 6 Feb 2023 20:58:35 -0800
  528. Subject: [PATCH] sched/fair: Do not even the number of busy CPUs via
  529. asym_packing
  530. Now that find_busiest_group() triggers load balancing between a fully_
  531. busy SMT2 core and an idle non-SMT core, it is no longer needed to force
  532. balancing via asym_packing. Use asym_packing only as intended: when there
  533. is high-priority CPU that is idle.
  534. After this change, the same logic apply to SMT and non-SMT local groups.
  535. Simplify asym_smt_can_pull_tasks() accordingly.
  536. Cc: Ben Segall <bsegall@google.com>
  537. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  538. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  539. Cc: Len Brown <len.brown@intel.com>
  540. Cc: Mel Gorman <mgorman@suse.de>
  541. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  542. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  543. Cc: Steven Rostedt <rostedt@goodmis.org>
  544. Cc: Tim C. Chen <tim.c.chen@intel.com>
  545. Cc: Valentin Schneider <vschneid@redhat.com>
  546. Cc: x86@kernel.org
  547. Cc: linux-kernel@vger.kernel.org
  548. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  549. Tested-by: Zhang Rui <rui.zhang@intel.com>
  550. Patchset: intel-thread-director
  551. ---
  552. kernel/sched/fair.c | 37 +++++--------------------------------
  553. 1 file changed, 5 insertions(+), 32 deletions(-)
  554. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  555. index 9e98cfcf1e48b..635e8b41a87c9 100644
  556. --- a/kernel/sched/fair.c
  557. +++ b/kernel/sched/fair.c
  558. @@ -9035,20 +9035,15 @@ group_type group_classify(unsigned int imbalance_pct,
  559. * @sgs: Load-balancing statistics of the candidate busiest group
  560. * @sg: The candidate busiest group
  561. *
  562. - * Check the state of the SMT siblings of both @sds::local and @sg and decide
  563. - * if @dst_cpu can pull tasks.
  564. + * Check the state of the SMT siblings of @sg and decide if @dst_cpu can pull
  565. + * tasks.
  566. *
  567. * This function must be called only if all the SMT siblings of @dst_cpu are
  568. * idle, if any.
  569. *
  570. - * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
  571. - * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
  572. - * only if @dst_cpu has higher priority.
  573. - *
  574. - * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
  575. - * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
  576. - * imbalances in the number of busy CPUs will be dealt with in
  577. - * find_busiest_group().
  578. + * @dst_cpu can pull tasks if @sg has exactly one busy CPU (i.e., one more than
  579. + * @sds::local) and has lower group priority than @sds::local. Bigger imbalances
  580. + * in the number of busy CPUs will be dealt with in find_busiest_group().
  581. *
  582. * Return: true if @dst_cpu can pull tasks, false otherwise.
  583. */
  584. @@ -9057,33 +9052,11 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
  585. struct sched_group *sg)
  586. {
  587. #ifdef CONFIG_SCHED_SMT
  588. - bool local_is_smt;
  589. int sg_busy_cpus;
  590. - local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
  591. sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
  592. - if (!local_is_smt) {
  593. - /*
  594. - * If we are here, @dst_cpu is idle and does not have SMT
  595. - * siblings. Pull tasks if candidate group has two or more
  596. - * busy CPUs.
  597. - */
  598. - if (sg_busy_cpus >= 2) /* implies sg_is_smt */
  599. - return true;
  600. -
  601. - /*
  602. - * @dst_cpu does not have SMT siblings. @sg may have SMT
  603. - * siblings and only one is busy. In such case, @dst_cpu
  604. - * can help if it has higher priority and is idle (i.e.,
  605. - * it has no running tasks).
  606. - */
  607. - return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
  608. - }
  609. -
  610. /*
  611. - * @dst_cpu has SMT siblings and are also idle.
  612. - *
  613. * If the difference in the number of busy CPUs is two or more, let
  614. * find_busiest_group() take care of it. We only care if @sg has
  615. * exactly one busy CPU. This covers SMT and non-SMT sched groups.
  616. --
  617. 2.39.2
  618. From f70e8d703a3703327fea540de41d6b4df922a85d Mon Sep 17 00:00:00 2001
  619. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  620. Date: Mon, 6 Feb 2023 20:58:36 -0800
  621. Subject: [PATCH] sched/topology: Remove SHARED_CHILD from ASYM_PACKING
  622. Only x86 and Power7 use ASYM_PACKING. They use it differently.
  623. Power7 has cores of equal priority, but the SMT siblings of a core have
  624. different priorities. Parent scheduling domains do not need (nor have) the
  625. ASYM_PACKING flag. SHARED_CHILD is not needed. Using SHARED_PARENT would
  626. cause the topology debug code to complain.
  627. X86 has cores of different priority, but all the SMT siblings of the core
  628. have equal priority. It needs ASYM_PACKING at the MC level, but not at the
  629. SMT level (it also needs it at upper levels if they have scheduling groups
  630. of different priority). Removing ASYM_PACKING from the SMT domain causes
  631. the topology debug code to complain.
  632. Remove SHARED_CHILD for now. We still need a topology check that satisfies
  633. both architectures.
  634. Cc: Ben Segall <bsegall@google.com>
  635. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  636. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  637. Cc: Len Brown <len.brown@intel.com>
  638. Cc: Mel Gorman <mgorman@suse.de>
  639. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  640. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  641. Cc: Steven Rostedt <rostedt@goodmis.org>
  642. Cc: Tim C. Chen <tim.c.chen@intel.com>
  643. Cc: Valentin Schneider <vschneid@redhat.com>
  644. Cc: x86@kernel.org
  645. Cc: linux-kernel@vger.kernel.org
  646. Suggested-by: Valentin Schneider <vschneid@redhat.com>
  647. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  648. Tested-by: Zhang Rui <rui.zhang@intel.com>
  649. Patchset: intel-thread-director
  650. ---
  651. include/linux/sched/sd_flags.h | 5 +----
  652. 1 file changed, 1 insertion(+), 4 deletions(-)
  653. diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
  654. index 57bde66d95f7a..800238854ba54 100644
  655. --- a/include/linux/sched/sd_flags.h
  656. +++ b/include/linux/sched/sd_flags.h
  657. @@ -132,12 +132,9 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
  658. /*
  659. * Place busy tasks earlier in the domain
  660. *
  661. - * SHARED_CHILD: Usually set on the SMT level. Technically could be set further
  662. - * up, but currently assumed to be set from the base domain
  663. - * upwards (see update_top_cache_domain()).
  664. * NEEDS_GROUPS: Load balancing flag.
  665. */
  666. -SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
  667. +SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS)
  668. /*
  669. * Prefer to place tasks in a sibling domain
  670. --
  671. 2.39.2
  672. From fa1585eecfadd43f866f50951bbe106c09e1f79f Mon Sep 17 00:00:00 2001
  673. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  674. Date: Mon, 6 Feb 2023 20:58:37 -0800
  675. Subject: [PATCH] x86/sched: Remove SD_ASYM_PACKING from the SMT domain flags
  676. There is no difference between any of the SMT siblings of a physical core.
  677. Do not do asym_packing load balancing at this level.
  678. Cc: Ben Segall <bsegall@google.com>
  679. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  680. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  681. Cc: Len Brown <len.brown@intel.com>
  682. Cc: Mel Gorman <mgorman@suse.de>
  683. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  684. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  685. Cc: Steven Rostedt <rostedt@goodmis.org>
  686. Cc: Tim C. Chen <tim.c.chen@intel.com>
  687. Cc: Valentin Schneider <vschneid@redhat.com>
  688. Cc: x86@kernel.org
  689. Cc: linux-kernel@vger.kernel.org
  690. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  691. Tested-by: Zhang Rui <rui.zhang@intel.com>
  692. Patchset: intel-thread-director
  693. ---
  694. arch/x86/kernel/smpboot.c | 2 +-
  695. 1 file changed, 1 insertion(+), 1 deletion(-)
  696. diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
  697. index 3f3ea0287f694..c3de98224cb4f 100644
  698. --- a/arch/x86/kernel/smpboot.c
  699. +++ b/arch/x86/kernel/smpboot.c
  700. @@ -545,7 +545,7 @@ static int x86_core_flags(void)
  701. #ifdef CONFIG_SCHED_SMT
  702. static int x86_smt_flags(void)
  703. {
  704. - return cpu_smt_flags() | x86_sched_itmt_flags();
  705. + return cpu_smt_flags();
  706. }
  707. #endif
  708. #ifdef CONFIG_SCHED_CLUSTER
  709. --
  710. 2.39.2
  711. From 5f0150f15f343e543bc257bca1db30f3481ed474 Mon Sep 17 00:00:00 2001
  712. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  713. Date: Mon, 6 Feb 2023 20:58:38 -0800
  714. Subject: [PATCH] x86/sched/itmt: Give all SMT siblings of a core the same
  715. priority
  716. X86 does not have the SD_ASYM_PACKING flag in the SMT domain. The scheduler
  717. knows how to handle SMT and non-SMT cores of different priority. There is
  718. no reason for SMT siblings of a core to have different priorities.
  719. Cc: Ben Segall <bsegall@google.com>
  720. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  721. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  722. Cc: Len Brown <len.brown@intel.com>
  723. Cc: Mel Gorman <mgorman@suse.de>
  724. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  725. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  726. Cc: Steven Rostedt <rostedt@goodmis.org>
  727. Cc: Tim C. Chen <tim.c.chen@intel.com>
  728. Cc: Valentin Schneider <vschneid@redhat.com>
  729. Cc: x86@kernel.org
  730. Cc: linux-kernel@vger.kernel.org
  731. Reviewed-by: Len Brown <len.brown@intel.com>
  732. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  733. Tested-by: Zhang Rui <rui.zhang@intel.com>
  734. Patchset: intel-thread-director
  735. ---
  736. arch/x86/kernel/itmt.c | 23 +++++------------------
  737. 1 file changed, 5 insertions(+), 18 deletions(-)
  738. diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
  739. index 9ff480e94511b..6510883c5e817 100644
  740. --- a/arch/x86/kernel/itmt.c
  741. +++ b/arch/x86/kernel/itmt.c
  742. @@ -174,32 +174,19 @@ int arch_asym_cpu_priority(int cpu)
  743. /**
  744. * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
  745. - * @prio: Priority of cpu core
  746. - * @core_cpu: The cpu number associated with the core
  747. + * @prio: Priority of @cpu
  748. + * @cpu: The CPU number
  749. *
  750. * The pstate driver will find out the max boost frequency
  751. * and call this function to set a priority proportional
  752. - * to the max boost frequency. CPU with higher boost
  753. + * to the max boost frequency. CPUs with higher boost
  754. * frequency will receive higher priority.
  755. *
  756. * No need to rebuild sched domain after updating
  757. * the CPU priorities. The sched domains have no
  758. * dependency on CPU priorities.
  759. */
  760. -void sched_set_itmt_core_prio(int prio, int core_cpu)
  761. +void sched_set_itmt_core_prio(int prio, int cpu)
  762. {
  763. - int cpu, i = 1;
  764. -
  765. - for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
  766. - int smt_prio;
  767. -
  768. - /*
  769. - * Ensure that the siblings are moved to the end
  770. - * of the priority chain and only used when
  771. - * all other high priority cpus are out of capacity.
  772. - */
  773. - smt_prio = prio * smp_num_siblings / (i * i);
  774. - per_cpu(sched_core_priority, cpu) = smt_prio;
  775. - i++;
  776. - }
  777. + per_cpu(sched_core_priority, cpu) = prio;
  778. }
  779. --
  780. 2.39.2
  781. From 59c32bf713f2d1c52fedb38ceeef07375736dbbc Mon Sep 17 00:00:00 2001
  782. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  783. Date: Mon, 6 Feb 2023 21:10:42 -0800
  784. Subject: [PATCH] sched/task_struct: Introduce IPC classes of tasks
  785. On hybrid processors, the architecture differences between the types of
  786. CPUs lead to different instructions-per-cycle (IPC) on each type of CPU.
  787. IPCs may differ further by the type of instructions. Instructions can be
  788. grouped into classes of similar IPCs.
  789. Hence, tasks can be classified into groups based on the type of
  790. instructions they execute.
  791. Add a new member task_struct::ipcc to associate a particular task to
  792. an IPC class that depends on the instructions it executes.
  793. The scheduler may use the IPC class of a task and data about the
  794. performance among CPUs of a given IPC class to improve throughput. It
  795. may, for instance, place certain classes of tasks on CPUs of higher
  796. performance.
  797. The methods to determine the classification of a task and its relative
  798. IPC score are specific to each CPU architecture.
  799. Cc: Ben Segall <bsegall@google.com>
  800. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  801. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  802. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  803. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  804. Cc: Len Brown <len.brown@intel.com>
  805. Cc: Lukasz Luba <lukasz.luba@arm.com>
  806. Cc: Mel Gorman <mgorman@suse.de>
  807. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  808. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  809. Cc: Steven Rostedt <rostedt@goodmis.org>
  810. Cc: Tim C. Chen <tim.c.chen@intel.com>
  811. Cc: Valentin Schneider <vschneid@redhat.com>
  812. Cc: x86@kernel.org
  813. Cc: linux-pm@vger.kernel.org
  814. Cc: linux-kernel@vger.kernel.org
  815. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  816. Patchset: intel-thread-director
  817. ---
  818. include/linux/sched.h | 10 ++++++++++
  819. init/Kconfig | 12 ++++++++++++
  820. 2 files changed, 22 insertions(+)
  821. diff --git a/include/linux/sched.h b/include/linux/sched.h
  822. index ffb6eb55cd135..ca0c32bf796fb 100644
  823. --- a/include/linux/sched.h
  824. +++ b/include/linux/sched.h
  825. @@ -127,6 +127,8 @@ struct task_group;
  826. __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
  827. TASK_PARKED)
  828. +#define IPC_CLASS_UNCLASSIFIED 0
  829. +
  830. #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
  831. #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
  832. @@ -1528,6 +1530,14 @@ struct task_struct {
  833. union rv_task_monitor rv[RV_PER_TASK_MONITORS];
  834. #endif
  835. +#ifdef CONFIG_IPC_CLASSES
  836. + /*
  837. + * A hardware-defined classification of task that reflects but is
  838. + * not identical to the number of instructions per cycle.
  839. + */
  840. + unsigned short ipcc;
  841. +#endif
  842. +
  843. /*
  844. * New fields for task_struct should be added above here, so that
  845. * they are included in the randomized portion of task_struct.
  846. diff --git a/init/Kconfig b/init/Kconfig
  847. index 0c214af99085d..0ddda55fde6a6 100644
  848. --- a/init/Kconfig
  849. +++ b/init/Kconfig
  850. @@ -865,6 +865,18 @@ config UCLAMP_BUCKETS_COUNT
  851. If in doubt, use the default value.
  852. +config IPC_CLASSES
  853. + bool "IPC classes of tasks"
  854. + depends on SMP
  855. + help
  856. + If selected, each task is assigned a classification value that
  857. + reflects the type of instructions that the task executes. This
  858. + classification reflects but is not equal to the number of
  859. + instructions retired per cycle.
  860. +
  861. + The scheduler uses the classification value to improve the placement
  862. + of tasks.
  863. +
  864. endmenu
  865. #
  866. --
  867. 2.39.2
  868. From c4738d1d773b0e4066f30875c6393adb77d55837 Mon Sep 17 00:00:00 2001
  869. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  870. Date: Mon, 6 Feb 2023 21:10:43 -0800
  871. Subject: [PATCH] sched: Add interfaces for IPC classes
  872. Add the interfaces that architectures shall implement to convey the data
  873. to support IPC classes.
  874. arch_update_ipcc() updates the IPC classification of the current task as
  875. given by hardware.
  876. arch_get_ipcc_score() provides a performance score for a given IPC class
  877. when placed on a specific CPU. Higher scores indicate higher performance.
  878. When a driver or equivalent enablement code has configured the necessary
  879. hardware to support IPC classes, it should call sched_enable_ipc_classes()
  880. to notify the scheduler that it can start using IPC classes data.
  881. The number of classes and the score of each class of task are determined
  882. by hardware.
  883. Cc: Ben Segall <bsegall@google.com>
  884. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  885. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  886. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  887. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  888. Cc: Len Brown <len.brown@intel.com>
  889. Cc: Lukasz Luba <lukasz.luba@arm.com>
  890. Cc: Mel Gorman <mgorman@suse.de>
  891. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  892. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  893. Cc: Steven Rostedt <rostedt@goodmis.org>
  894. Cc: Tim C. Chen <tim.c.chen@intel.com>
  895. Cc: Valentin Schneider <vschneid@redhat.com>
  896. Cc: x86@kernel.org
  897. Cc: linux-pm@vger.kernel.org
  898. Cc: linux-kernel@vger.kernel.org
  899. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  900. Patchset: intel-thread-director
  901. ---
  902. include/linux/sched/topology.h | 6 ++++
  903. kernel/sched/sched.h | 66 ++++++++++++++++++++++++++++++++++
  904. kernel/sched/topology.c | 9 +++++
  905. 3 files changed, 81 insertions(+)
  906. diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
  907. index 816df6cc444e1..5b084d3c9ad12 100644
  908. --- a/include/linux/sched/topology.h
  909. +++ b/include/linux/sched/topology.h
  910. @@ -280,4 +280,10 @@ static inline int task_node(const struct task_struct *p)
  911. return cpu_to_node(task_cpu(p));
  912. }
  913. +#ifdef CONFIG_IPC_CLASSES
  914. +extern void sched_enable_ipc_classes(void);
  915. +#else
  916. +static inline void sched_enable_ipc_classes(void) { }
  917. +#endif
  918. +
  919. #endif /* _LINUX_SCHED_TOPOLOGY_H */
  920. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
  921. index d6d488e8eb554..a3b2b66e077d9 100644
  922. --- a/kernel/sched/sched.h
  923. +++ b/kernel/sched/sched.h
  924. @@ -2511,6 +2511,72 @@ void arch_scale_freq_tick(void)
  925. }
  926. #endif
  927. +#ifdef CONFIG_IPC_CLASSES
  928. +DECLARE_STATIC_KEY_FALSE(sched_ipcc);
  929. +
  930. +static inline bool sched_ipcc_enabled(void)
  931. +{
  932. + return static_branch_unlikely(&sched_ipcc);
  933. +}
  934. +
  935. +#ifndef arch_update_ipcc
  936. +/**
  937. + * arch_update_ipcc() - Update the IPC class of the current task
  938. + * @curr: The current task
  939. + *
  940. + * Request that the IPC classification of @curr is updated.
  941. + *
  942. + * Returns: none
  943. + */
  944. +static __always_inline
  945. +void arch_update_ipcc(struct task_struct *curr)
  946. +{
  947. +}
  948. +#endif
  949. +
  950. +#ifndef arch_get_ipcc_score
  951. +
  952. +#define SCHED_IPCC_SCORE_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
  953. +/**
  954. + * arch_get_ipcc_score() - Get the IPC score of a class of task
  955. + * @ipcc: The IPC class
  956. + * @cpu: A CPU number
  957. + *
  958. + * The IPC performance scores reflects (but it is not identical to) the number
  959. + * of instructions retired per cycle for a given IPC class. It is a linear and
  960. + * abstract metric. Higher scores reflect better performance.
  961. + *
  962. + * The IPC score can be normalized with respect to the class, i, with the
  963. + * highest IPC score on the CPU, c, with highest performance:
  964. + *
  965. + * IPC(i, c)
  966. + * ------------------------------------ * SCHED_IPCC_SCORE_SCALE
  967. + * max(IPC(i, c) : (i, c))
  968. + *
  969. + * Scheduling schemes that want to use the IPC score along with other
  970. + * normalized metrics for scheduling (e.g., CPU capacity) may need to normalize
  971. + * it.
  972. + *
  973. + * Other scheduling schemes (e.g., asym_packing) do not need normalization.
  974. + *
  975. + * Returns the performance score of an IPC class, @ipcc, when running on @cpu.
  976. + * Error when either @ipcc or @cpu are invalid.
  977. + */
  978. +static __always_inline
  979. +unsigned long arch_get_ipcc_score(unsigned short ipcc, int cpu)
  980. +{
  981. + return SCHED_IPCC_SCORE_SCALE;
  982. +}
  983. +#endif
  984. +#else /* CONFIG_IPC_CLASSES */
  985. +
  986. +#define arch_get_ipcc_score(ipcc, cpu) (-EINVAL)
  987. +#define arch_update_ipcc(curr)
  988. +
  989. +static inline bool sched_ipcc_enabled(void) { return false; }
  990. +
  991. +#endif /* CONFIG_IPC_CLASSES */
  992. +
  993. #ifndef arch_scale_freq_capacity
  994. /**
  995. * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
  996. diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
  997. index 8739c2a5a54ea..60e03d15f58ca 100644
  998. --- a/kernel/sched/topology.c
  999. +++ b/kernel/sched/topology.c
  1000. @@ -670,6 +670,15 @@ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
  1001. DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
  1002. DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
  1003. +#ifdef CONFIG_IPC_CLASSES
  1004. +DEFINE_STATIC_KEY_FALSE(sched_ipcc);
  1005. +
  1006. +void sched_enable_ipc_classes(void)
  1007. +{
  1008. + static_branch_enable_cpuslocked(&sched_ipcc);
  1009. +}
  1010. +#endif
  1011. +
  1012. static void update_top_cache_domain(int cpu)
  1013. {
  1014. struct sched_domain_shared *sds = NULL;
  1015. --
  1016. 2.39.2
  1017. From 021e8196d4ef87aa80fae8024b8055a6bca5b9c4 Mon Sep 17 00:00:00 2001
  1018. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1019. Date: Mon, 6 Feb 2023 21:10:44 -0800
  1020. Subject: [PATCH] sched/core: Initialize the IPC class of a new task
  1021. New tasks shall start life as unclassified. They will be classified by
  1022. hardware when they run.
  1023. Cc: Ben Segall <bsegall@google.com>
  1024. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1025. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1026. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1027. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1028. Cc: Len Brown <len.brown@intel.com>
  1029. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1030. Cc: Mel Gorman <mgorman@suse.de>
  1031. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1032. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1033. Cc: Steven Rostedt <rostedt@goodmis.org>
  1034. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1035. Cc: Valentin Schneider <vschneid@redhat.com>
  1036. Cc: x86@kernel.org
  1037. Cc: linux-pm@vger.kernel.org
  1038. Cc: linux-kernel@vger.kernel.org
  1039. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1040. Patchset: intel-thread-director
  1041. ---
  1042. kernel/sched/core.c | 3 +++
  1043. 1 file changed, 3 insertions(+)
  1044. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  1045. index f730b6fe94a7f..7b63bf90430bb 100644
  1046. --- a/kernel/sched/core.c
  1047. +++ b/kernel/sched/core.c
  1048. @@ -4377,6 +4377,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
  1049. p->se.prev_sum_exec_runtime = 0;
  1050. p->se.nr_migrations = 0;
  1051. p->se.vruntime = 0;
  1052. +#ifdef CONFIG_IPC_CLASSES
  1053. + p->ipcc = IPC_CLASS_UNCLASSIFIED;
  1054. +#endif
  1055. INIT_LIST_HEAD(&p->se.group_node);
  1056. #ifdef CONFIG_FAIR_GROUP_SCHED
  1057. --
  1058. 2.39.2
  1059. From ef293bd0404885577a038daf0c011a57eeece3de Mon Sep 17 00:00:00 2001
  1060. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1061. Date: Mon, 6 Feb 2023 21:10:45 -0800
  1062. Subject: [PATCH] sched/core: Add user_tick as argument to scheduler_tick()
  1063. Differentiate between user and kernel ticks so that the scheduler updates
  1064. the IPC class of the current task during the former.
  1065. Cc: Ben Segall <bsegall@google.com>
  1066. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1067. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1068. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1069. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1070. Cc: Len Brown <len.brown@intel.com>
  1071. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1072. Cc: Mel Gorman <mgorman@suse.de>
  1073. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1074. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1075. Cc: Steven Rostedt <rostedt@goodmis.org>
  1076. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1077. Cc: Valentin Schneider <vschneid@redhat.com>
  1078. Cc: x86@kernel.org
  1079. Cc: linux-pm@vger.kernel.org
  1080. Cc: linux-kernel@vger.kernel.org
  1081. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1082. Patchset: intel-thread-director
  1083. ---
  1084. include/linux/sched.h | 2 +-
  1085. kernel/sched/core.c | 2 +-
  1086. kernel/time/timer.c | 2 +-
  1087. 3 files changed, 3 insertions(+), 3 deletions(-)
  1088. diff --git a/include/linux/sched.h b/include/linux/sched.h
  1089. index ca0c32bf796fb..e58dc7503864c 100644
  1090. --- a/include/linux/sched.h
  1091. +++ b/include/linux/sched.h
  1092. @@ -293,7 +293,7 @@ enum {
  1093. TASK_COMM_LEN = 16,
  1094. };
  1095. -extern void scheduler_tick(void);
  1096. +extern void scheduler_tick(bool user_tick);
  1097. #define MAX_SCHEDULE_TIMEOUT LONG_MAX
  1098. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  1099. index 7b63bf90430bb..0a8558421eba2 100644
  1100. --- a/kernel/sched/core.c
  1101. +++ b/kernel/sched/core.c
  1102. @@ -5492,7 +5492,7 @@ static inline u64 cpu_resched_latency(struct rq *rq) { return 0; }
  1103. * This function gets called by the timer code, with HZ frequency.
  1104. * We call it with interrupts disabled.
  1105. */
  1106. -void scheduler_tick(void)
  1107. +void scheduler_tick(bool user_tick)
  1108. {
  1109. int cpu = smp_processor_id();
  1110. struct rq *rq = cpu_rq(cpu);
  1111. diff --git a/kernel/time/timer.c b/kernel/time/timer.c
  1112. index 717fcb9fb14aa..b444b6f5f585b 100644
  1113. --- a/kernel/time/timer.c
  1114. +++ b/kernel/time/timer.c
  1115. @@ -1841,7 +1841,7 @@ void update_process_times(int user_tick)
  1116. if (in_irq())
  1117. irq_work_tick();
  1118. #endif
  1119. - scheduler_tick();
  1120. + scheduler_tick(user_tick);
  1121. if (IS_ENABLED(CONFIG_POSIX_TIMERS))
  1122. run_posix_cpu_timers();
  1123. }
  1124. --
  1125. 2.39.2
  1126. From 45a0511c0378f765cc26fb1faaddc3080ab2e179 Mon Sep 17 00:00:00 2001
  1127. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1128. Date: Mon, 6 Feb 2023 21:10:46 -0800
  1129. Subject: [PATCH] sched/core: Update the IPC class of the current task
  1130. When supported, hardware monitors the instruction stream to classify the
  1131. current task. Hence, at userspace tick, we are ready to read the most
  1132. recent classification result for the current task.
  1133. Cc: Ben Segall <bsegall@google.com>
  1134. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1135. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1136. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1137. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1138. Cc: Len Brown <len.brown@intel.com>
  1139. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1140. Cc: Mel Gorman <mgorman@suse.de>
  1141. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1142. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1143. Cc: Steven Rostedt <rostedt@goodmis.org>
  1144. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1145. Cc: Valentin Schneider <vschneid@redhat.com>
  1146. Cc: x86@kernel.org
  1147. Cc: linux-pm@vger.kernel.org
  1148. Cc: linux-kernel@vger.kernel.org
  1149. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1150. Patchset: intel-thread-director
  1151. ---
  1152. kernel/sched/core.c | 3 +++
  1153. 1 file changed, 3 insertions(+)
  1154. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  1155. index 0a8558421eba2..4782b1359eb89 100644
  1156. --- a/kernel/sched/core.c
  1157. +++ b/kernel/sched/core.c
  1158. @@ -5504,6 +5504,9 @@ void scheduler_tick(bool user_tick)
  1159. if (housekeeping_cpu(cpu, HK_TYPE_TICK))
  1160. arch_scale_freq_tick();
  1161. + if (sched_ipcc_enabled() && user_tick)
  1162. + arch_update_ipcc(curr);
  1163. +
  1164. sched_clock_tick();
  1165. rq_lock(rq, &rf);
  1166. --
  1167. 2.39.2
  1168. From b88b78dd46cf545386f984768597179451f2bdf0 Mon Sep 17 00:00:00 2001
  1169. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1170. Date: Mon, 6 Feb 2023 21:10:47 -0800
  1171. Subject: [PATCH] sched/fair: Collect load-balancing stats for IPC classes
  1172. When selecting a busiest scheduling group, the IPC class of the current
  1173. task can be used to select between two scheduling groups of types asym_
  1174. packing or fully_busy that are otherwise identical.
  1175. Compute the IPC class performance score for a scheduling group. It
  1176. is the sum of the scores of the current tasks of all the runqueues.
  1177. Also, keep track of the class of the task with the lowest IPC class score
  1178. in the scheduling group.
  1179. These two metrics will be used during idle load balancing to compute the
  1180. current and the prospective IPC class score of a scheduling group.
  1181. Cc: Ben Segall <bsegall@google.com>
  1182. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1183. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1184. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1185. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1186. Cc: Len Brown <len.brown@intel.com>
  1187. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1188. Cc: Mel Gorman <mgorman@suse.de>
  1189. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1190. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1191. Cc: Steven Rostedt <rostedt@goodmis.org>
  1192. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1193. Cc: Valentin Schneider <vschneid@redhat.com>
  1194. Cc: x86@kernel.org
  1195. Cc: linux-pm@vger.kernel.org
  1196. Cc: linux-kernel@vger.kernel.org
  1197. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1198. Patchset: intel-thread-director
  1199. ---
  1200. kernel/sched/fair.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
  1201. 1 file changed, 61 insertions(+)
  1202. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1203. index 635e8b41a87c9..86f779d9d2a90 100644
  1204. --- a/kernel/sched/fair.c
  1205. +++ b/kernel/sched/fair.c
  1206. @@ -8751,6 +8751,11 @@ struct sg_lb_stats {
  1207. unsigned int nr_numa_running;
  1208. unsigned int nr_preferred_running;
  1209. #endif
  1210. +#ifdef CONFIG_IPC_CLASSES
  1211. + unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
  1212. + unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
  1213. + unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
  1214. +#endif
  1215. };
  1216. /*
  1217. @@ -9028,6 +9033,59 @@ group_type group_classify(unsigned int imbalance_pct,
  1218. return group_has_spare;
  1219. }
  1220. +#ifdef CONFIG_IPC_CLASSES
  1221. +static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
  1222. +{
  1223. + /* All IPCC stats have been set to zero in update_sg_lb_stats(). */
  1224. + sgs->min_score = ULONG_MAX;
  1225. +}
  1226. +
  1227. +/* Called only if cpu_of(@rq) is not idle and has tasks running. */
  1228. +static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1229. + struct rq *rq)
  1230. +{
  1231. + struct task_struct *curr;
  1232. + unsigned short ipcc;
  1233. + unsigned long score;
  1234. +
  1235. + if (!sched_ipcc_enabled())
  1236. + return;
  1237. +
  1238. + curr = rcu_dereference(rq->curr);
  1239. + if (!curr || (curr->flags & PF_EXITING) || is_idle_task(curr) ||
  1240. + task_is_realtime(curr) ||
  1241. + !cpumask_test_cpu(dst_cpu, curr->cpus_ptr))
  1242. + return;
  1243. +
  1244. + ipcc = curr->ipcc;
  1245. + score = arch_get_ipcc_score(ipcc, cpu_of(rq));
  1246. +
  1247. + /*
  1248. + * Ignore tasks with invalid scores. When finding the busiest group, we
  1249. + * prefer those with higher sum_score. This group will not be selected.
  1250. + */
  1251. + if (IS_ERR_VALUE(score))
  1252. + return;
  1253. +
  1254. + sgs->sum_score += score;
  1255. +
  1256. + if (score < sgs->min_score) {
  1257. + sgs->min_score = score;
  1258. + sgs->min_ipcc = ipcc;
  1259. + }
  1260. +}
  1261. +
  1262. +#else /* CONFIG_IPC_CLASSES */
  1263. +static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1264. + struct rq *rq)
  1265. +{
  1266. +}
  1267. +
  1268. +static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
  1269. +{
  1270. +}
  1271. +#endif /* CONFIG_IPC_CLASSES */
  1272. +
  1273. /**
  1274. * asym_smt_can_pull_tasks - Check whether the load balancing CPU can pull tasks
  1275. * @dst_cpu: Destination CPU of the load balancing
  1276. @@ -9120,6 +9178,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  1277. int i, nr_running, local_group;
  1278. memset(sgs, 0, sizeof(*sgs));
  1279. + init_rq_ipcc_stats(sgs);
  1280. local_group = group == sds->local;
  1281. @@ -9169,6 +9228,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  1282. if (sgs->group_misfit_task_load < load)
  1283. sgs->group_misfit_task_load = load;
  1284. }
  1285. +
  1286. + update_sg_lb_ipcc_stats(env->dst_cpu, sgs, rq);
  1287. }
  1288. sgs->group_capacity = group->sgc->capacity;
  1289. --
  1290. 2.39.2
  1291. From ba3c46567c032ee843d8ab5f576ddc78df1a56bf Mon Sep 17 00:00:00 2001
  1292. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1293. Date: Mon, 6 Feb 2023 21:10:48 -0800
  1294. Subject: [PATCH] sched/fair: Compute IPC class scores for load balancing
  1295. Compute the joint total (both current and prospective) IPC class score of
  1296. a scheduling group and the local scheduling group.
  1297. These IPCC statistics are used during idle load balancing. The candidate
  1298. scheduling group will have one fewer busy CPU after load balancing. This
  1299. observation is important for cores with SMT support.
  1300. The IPCC score of scheduling groups composed of SMT siblings needs to
  1301. consider that the siblings share CPU resources. When computing the total
  1302. IPCC score of the scheduling group, divide score of each sibling by the
  1303. number of busy siblings.
  1304. Collect IPCC statistics for asym_packing and fully_busy scheduling groups.
  1305. When picking a busiest group, they are used to break ties between otherwise
  1306. identical groups.
  1307. Cc: Ben Segall <bsegall@google.com>
  1308. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1309. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1310. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1311. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1312. Cc: Len Brown <len.brown@intel.com>
  1313. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1314. Cc: Mel Gorman <mgorman@suse.de>
  1315. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1316. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1317. Cc: Steven Rostedt <rostedt@goodmis.org>
  1318. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1319. Cc: Valentin Schneider <vschneid@redhat.com>
  1320. Cc: x86@kernel.org
  1321. Cc: linux-pm@vger.kernel.org
  1322. Cc: linux-kernel@vger.kernel.org
  1323. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1324. Patchset: intel-thread-director
  1325. ---
  1326. kernel/sched/fair.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
  1327. 1 file changed, 68 insertions(+)
  1328. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1329. index 86f779d9d2a90..3b84fb72891bc 100644
  1330. --- a/kernel/sched/fair.c
  1331. +++ b/kernel/sched/fair.c
  1332. @@ -8755,6 +8755,8 @@ struct sg_lb_stats {
  1333. unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
  1334. unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
  1335. unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
  1336. + long ipcc_score_after; /* Prospective IPCC score after load balancing */
  1337. + unsigned long ipcc_score_before; /* IPCC score before load balancing */
  1338. #endif
  1339. };
  1340. @@ -9075,6 +9077,62 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1341. }
  1342. }
  1343. +static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1344. + struct sched_group *sg,
  1345. + struct lb_env *env)
  1346. +{
  1347. + unsigned long score_on_dst_cpu, before;
  1348. + int busy_cpus;
  1349. + long after;
  1350. +
  1351. + if (!sched_ipcc_enabled())
  1352. + return;
  1353. +
  1354. + /*
  1355. + * IPCC scores are only useful during idle load balancing. For now,
  1356. + * only asym_packing uses IPCC scores.
  1357. + */
  1358. + if (!(env->sd->flags & SD_ASYM_PACKING) ||
  1359. + env->idle == CPU_NOT_IDLE)
  1360. + return;
  1361. +
  1362. + /*
  1363. + * IPCC scores are used to break ties only between these types of
  1364. + * groups.
  1365. + */
  1366. + if (sgs->group_type != group_fully_busy &&
  1367. + sgs->group_type != group_asym_packing)
  1368. + return;
  1369. +
  1370. + busy_cpus = sgs->group_weight - sgs->idle_cpus;
  1371. +
  1372. + /* No busy CPUs in the group. No tasks to move. */
  1373. + if (!busy_cpus)
  1374. + return;
  1375. +
  1376. + score_on_dst_cpu = arch_get_ipcc_score(sgs->min_ipcc, env->dst_cpu);
  1377. +
  1378. + /*
  1379. + * Do not use IPC scores. sgs::ipcc_score_{after, before} will be zero
  1380. + * and not used.
  1381. + */
  1382. + if (IS_ERR_VALUE(score_on_dst_cpu))
  1383. + return;
  1384. +
  1385. + before = sgs->sum_score;
  1386. + after = before - sgs->min_score;
  1387. +
  1388. + /* SMT siblings share throughput. */
  1389. + if (busy_cpus > 1 && sg->flags & SD_SHARE_CPUCAPACITY) {
  1390. + before /= busy_cpus;
  1391. + /* One sibling will become idle after load balance. */
  1392. + after /= busy_cpus - 1;
  1393. + }
  1394. +
  1395. + sgs->ipcc_score_after = after + score_on_dst_cpu;
  1396. + sgs->ipcc_score_before = before;
  1397. +}
  1398. +
  1399. #else /* CONFIG_IPC_CLASSES */
  1400. static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1401. struct rq *rq)
  1402. @@ -9084,6 +9142,13 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1403. static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
  1404. {
  1405. }
  1406. +
  1407. +static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1408. + struct sched_group *sg,
  1409. + struct lb_env *env)
  1410. +{
  1411. +}
  1412. +
  1413. #endif /* CONFIG_IPC_CLASSES */
  1414. /**
  1415. @@ -9245,6 +9310,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
  1416. sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
  1417. + if (!local_group)
  1418. + update_sg_lb_stats_scores(sgs, group, env);
  1419. +
  1420. /* Computing avg_load makes sense only when group is overloaded */
  1421. if (sgs->group_type == group_overloaded)
  1422. sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
  1423. --
  1424. 2.39.2
  1425. From 98664eb2ae97aaf9b290077ef9e6629a7c1d7d79 Mon Sep 17 00:00:00 2001
  1426. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1427. Date: Mon, 6 Feb 2023 21:10:49 -0800
  1428. Subject: [PATCH] sched/fair: Use IPCC stats to break ties between asym_packing
  1429. sched groups
  1430. As it iterates, update_sd_pick_busiest() keeps on selecting as busiest
  1431. sched groups of identical priority. Since both groups have the same
  1432. priority, either group is a good choice. The IPCC statistics provide a
  1433. measure of the throughput before and after load balance. Use them to
  1434. pick a busiest scheduling group from otherwise identical asym_packing
  1435. scheduling groups.
  1436. Pick as busiest the scheduling group that yields a higher IPCC score
  1437. after load balancing.
  1438. Cc: Ben Segall <bsegall@google.com>
  1439. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1440. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1441. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1442. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1443. Cc: Len Brown <len.brown@intel.com>
  1444. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1445. Cc: Mel Gorman <mgorman@suse.de>
  1446. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1447. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1448. Cc: Steven Rostedt <rostedt@goodmis.org>
  1449. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1450. Cc: Valentin Schneider <vschneid@redhat.com>
  1451. Cc: x86@kernel.org
  1452. Cc: linux-pm@vger.kernel.org
  1453. Cc: linux-kernel@vger.kernel.org
  1454. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1455. Patchset: intel-thread-director
  1456. ---
  1457. kernel/sched/fair.c | 72 +++++++++++++++++++++++++++++++++++++++++++++
  1458. 1 file changed, 72 insertions(+)
  1459. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1460. index 3b84fb72891bc..89a13ae0185e6 100644
  1461. --- a/kernel/sched/fair.c
  1462. +++ b/kernel/sched/fair.c
  1463. @@ -9133,6 +9133,60 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1464. sgs->ipcc_score_before = before;
  1465. }
  1466. +/**
  1467. + * sched_asym_ipcc_prefer - Select a sched group based on its IPCC score
  1468. + * @a: Load balancing statistics of a sched group
  1469. + * @b: Load balancing statistics of a second sched group
  1470. + *
  1471. + * Returns: true if @a has a higher IPCC score than @b after load balance.
  1472. + * False otherwise.
  1473. + */
  1474. +static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
  1475. + struct sg_lb_stats *b)
  1476. +{
  1477. + if (!sched_ipcc_enabled())
  1478. + return false;
  1479. +
  1480. + /* @a increases overall throughput after load balance. */
  1481. + if (a->ipcc_score_after > b->ipcc_score_after)
  1482. + return true;
  1483. +
  1484. + /*
  1485. + * If @a and @b yield the same overall throughput, pick @a if
  1486. + * its current throughput is lower than that of @b.
  1487. + */
  1488. + if (a->ipcc_score_after == b->ipcc_score_after)
  1489. + return a->ipcc_score_before < b->ipcc_score_before;
  1490. +
  1491. + return false;
  1492. +}
  1493. +
  1494. +/**
  1495. + * sched_asym_ipcc_pick - Select a sched group based on its IPCC score
  1496. + * @a: A scheduling group
  1497. + * @b: A second scheduling group
  1498. + * @a_stats: Load balancing statistics of @a
  1499. + * @b_stats: Load balancing statistics of @b
  1500. + *
  1501. + * Returns: true if @a has the same priority and @a has tasks with IPC classes
  1502. + * that yield higher overall throughput after load balance. False otherwise.
  1503. + */
  1504. +static bool sched_asym_ipcc_pick(struct sched_group *a,
  1505. + struct sched_group *b,
  1506. + struct sg_lb_stats *a_stats,
  1507. + struct sg_lb_stats *b_stats)
  1508. +{
  1509. + /*
  1510. + * Only use the class-specific preference selection if both sched
  1511. + * groups have the same priority.
  1512. + */
  1513. + if (arch_asym_cpu_priority(a->asym_prefer_cpu) !=
  1514. + arch_asym_cpu_priority(b->asym_prefer_cpu))
  1515. + return false;
  1516. +
  1517. + return sched_asym_ipcc_prefer(a_stats, b_stats);
  1518. +}
  1519. +
  1520. #else /* CONFIG_IPC_CLASSES */
  1521. static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1522. struct rq *rq)
  1523. @@ -9149,6 +9203,14 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1524. {
  1525. }
  1526. +static bool sched_asym_ipcc_pick(struct sched_group *a,
  1527. + struct sched_group *b,
  1528. + struct sg_lb_stats *a_stats,
  1529. + struct sg_lb_stats *b_stats)
  1530. +{
  1531. + return false;
  1532. +}
  1533. +
  1534. #endif /* CONFIG_IPC_CLASSES */
  1535. /**
  1536. @@ -9384,6 +9446,16 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  1537. /* Prefer to move from lowest priority CPU's work */
  1538. if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
  1539. return false;
  1540. +
  1541. + /*
  1542. + * Unlike other callers of sched_asym_prefer(), here both @sg
  1543. + * and @sds::busiest have tasks running. When they have equal
  1544. + * priority, their IPC class scores can be used to select a
  1545. + * better busiest.
  1546. + */
  1547. + if (sched_asym_ipcc_pick(sds->busiest, sg, &sds->busiest_stat, sgs))
  1548. + return false;
  1549. +
  1550. break;
  1551. case group_misfit_task:
  1552. --
  1553. 2.39.2
  1554. From a4b0646c23237cc67703ed40bdde243d78bb7910 Mon Sep 17 00:00:00 2001
  1555. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1556. Date: Mon, 6 Feb 2023 21:10:50 -0800
  1557. Subject: [PATCH] sched/fair: Use IPCC stats to break ties between fully_busy
  1558. SMT groups
  1559. IPCC statistics are used during idle load balancing. After balancing one
  1560. of the siblings of an SMT core will become idle. The rest of the busy
  1561. siblings will enjoy increased throughput. The IPCC statistics provide
  1562. a measure of the increased throughput. Use them to pick a busiest group
  1563. from otherwise identical fully_busy scheduling groups (of which the
  1564. avg_load is equal - and zero).
  1565. Using IPCC scores to break ties with non-SMT fully_busy sched groups
  1566. is not necessary. SMT sched groups always need more help.
  1567. Add a stub sched_asym_ipcc_prefer() for !CONFIG_IPC_CLASSES.
  1568. Cc: Ben Segall <bsegall@google.com>
  1569. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1570. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1571. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1572. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1573. Cc: Len Brown <len.brown@intel.com>
  1574. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1575. Cc: Mel Gorman <mgorman@suse.de>
  1576. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1577. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1578. Cc: Steven Rostedt <rostedt@goodmis.org>
  1579. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1580. Cc: Valentin Schneider <vschneid@redhat.com>
  1581. Cc: x86@kernel.org
  1582. Cc: linux-pm@vger.kernel.org
  1583. Cc: linux-kernel@vger.kernel.org
  1584. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1585. Patchset: intel-thread-director
  1586. ---
  1587. kernel/sched/fair.c | 23 ++++++++++++++++++++---
  1588. 1 file changed, 20 insertions(+), 3 deletions(-)
  1589. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1590. index 89a13ae0185e6..4f75e9964e8ca 100644
  1591. --- a/kernel/sched/fair.c
  1592. +++ b/kernel/sched/fair.c
  1593. @@ -9203,6 +9203,12 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
  1594. {
  1595. }
  1596. +static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
  1597. + struct sg_lb_stats *b)
  1598. +{
  1599. + return false;
  1600. +}
  1601. +
  1602. static bool sched_asym_ipcc_pick(struct sched_group *a,
  1603. struct sched_group *b,
  1604. struct sg_lb_stats *a_stats,
  1605. @@ -9486,10 +9492,21 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  1606. if (sgs->avg_load == busiest->avg_load) {
  1607. /*
  1608. * SMT sched groups need more help than non-SMT groups.
  1609. - * If @sg happens to also be SMT, either choice is good.
  1610. */
  1611. - if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
  1612. - return false;
  1613. + if (sds->busiest->flags & SD_SHARE_CPUCAPACITY) {
  1614. + if (!(sg->flags & SD_SHARE_CPUCAPACITY))
  1615. + return false;
  1616. +
  1617. + /*
  1618. + * Between two SMT groups, use IPCC scores to pick the
  1619. + * one that would improve throughput the most (only
  1620. + * asym_packing uses IPCC scores for now).
  1621. + */
  1622. + if (sched_ipcc_enabled() &&
  1623. + env->sd->flags & SD_ASYM_PACKING &&
  1624. + sched_asym_ipcc_prefer(busiest, sgs))
  1625. + return false;
  1626. + }
  1627. }
  1628. break;
  1629. --
  1630. 2.39.2
  1631. From cc96548bd32f891b0cdf162cdde5f8ca2ba98404 Mon Sep 17 00:00:00 2001
  1632. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1633. Date: Mon, 6 Feb 2023 21:10:51 -0800
  1634. Subject: [PATCH] sched/fair: Use IPCC scores to select a busiest runqueue
  1635. For two runqueues of equal priority and equal number of running of tasks,
  1636. select the one whose current task would have the highest IPC class score
  1637. if placed on the destination CPU.
  1638. For now, use IPCC scores only for scheduling domains with the
  1639. SD_ASYM_PACKING flag.
  1640. Cc: Ben Segall <bsegall@google.com>
  1641. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1642. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1643. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1644. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1645. Cc: Len Brown <len.brown@intel.com>
  1646. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1647. Cc: Mel Gorman <mgorman@suse.de>
  1648. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1649. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1650. Cc: Steven Rostedt <rostedt@goodmis.org>
  1651. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1652. Cc: Valentin Schneider <vschneid@redhat.com>
  1653. Cc: x86@kernel.org
  1654. Cc: linux-pm@vger.kernel.org
  1655. Cc: linux-kernel@vger.kernel.org
  1656. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1657. Patchset: intel-thread-director
  1658. ---
  1659. kernel/sched/fair.c | 64 +++++++++++++++++++++++++++++++++++++++++++++
  1660. 1 file changed, 64 insertions(+)
  1661. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  1662. index 4f75e9964e8ca..fc42b58f1ba42 100644
  1663. --- a/kernel/sched/fair.c
  1664. +++ b/kernel/sched/fair.c
  1665. @@ -9187,6 +9187,37 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
  1666. return sched_asym_ipcc_prefer(a_stats, b_stats);
  1667. }
  1668. +/**
  1669. + * ipcc_score_delta - Get the IPCC score delta wrt the load balance's dst_cpu
  1670. + * @p: A task
  1671. + * @env: Load balancing environment
  1672. + *
  1673. + * Returns: The IPCC score delta that @p would get if placed in the destination
  1674. + * CPU of @env. LONG_MIN to indicate that the delta should not be used.
  1675. + */
  1676. +static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
  1677. +{
  1678. + unsigned long score_src, score_dst;
  1679. + unsigned short ipcc = p->ipcc;
  1680. +
  1681. + if (!sched_ipcc_enabled())
  1682. + return LONG_MIN;
  1683. +
  1684. + /* Only asym_packing uses IPCC scores at the moment. */
  1685. + if (!(env->sd->flags & SD_ASYM_PACKING))
  1686. + return LONG_MIN;
  1687. +
  1688. + score_dst = arch_get_ipcc_score(ipcc, env->dst_cpu);
  1689. + if (IS_ERR_VALUE(score_dst))
  1690. + return LONG_MIN;
  1691. +
  1692. + score_src = arch_get_ipcc_score(ipcc, task_cpu(p));
  1693. + if (IS_ERR_VALUE(score_src))
  1694. + return LONG_MIN;
  1695. +
  1696. + return score_dst - score_src;
  1697. +}
  1698. +
  1699. #else /* CONFIG_IPC_CLASSES */
  1700. static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
  1701. struct rq *rq)
  1702. @@ -9217,6 +9248,11 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
  1703. return false;
  1704. }
  1705. +static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
  1706. +{
  1707. + return LONG_MIN;
  1708. +}
  1709. +
  1710. #endif /* CONFIG_IPC_CLASSES */
  1711. /**
  1712. @@ -10377,6 +10413,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
  1713. {
  1714. struct rq *busiest = NULL, *rq;
  1715. unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
  1716. + long busiest_ipcc_delta = LONG_MIN;
  1717. unsigned int busiest_nr = 0;
  1718. int i;
  1719. @@ -10493,8 +10530,35 @@ static struct rq *find_busiest_queue(struct lb_env *env,
  1720. case migrate_task:
  1721. if (busiest_nr < nr_running) {
  1722. + struct task_struct *curr;
  1723. +
  1724. busiest_nr = nr_running;
  1725. busiest = rq;
  1726. +
  1727. + /*
  1728. + * Remember the IPCC score delta of busiest::curr.
  1729. + * We may need it to break a tie with other queues
  1730. + * with equal nr_running.
  1731. + */
  1732. + curr = rcu_dereference(busiest->curr);
  1733. + busiest_ipcc_delta = ipcc_score_delta(curr, env);
  1734. + /*
  1735. + * If rq and busiest have the same number of running
  1736. + * tasks and IPC classes are supported, pick rq if doing
  1737. + * so would give rq::curr a bigger IPC boost on dst_cpu.
  1738. + */
  1739. + } else if (busiest_nr == nr_running) {
  1740. + struct task_struct *curr;
  1741. + long delta;
  1742. +
  1743. + curr = rcu_dereference(rq->curr);
  1744. + delta = ipcc_score_delta(curr, env);
  1745. +
  1746. + if (busiest_ipcc_delta < delta) {
  1747. + busiest_ipcc_delta = delta;
  1748. + busiest_nr = nr_running;
  1749. + busiest = rq;
  1750. + }
  1751. }
  1752. break;
  1753. --
  1754. 2.39.2
  1755. From 8435406d47c8fb7c349f7615ebb1cd0a812afc90 Mon Sep 17 00:00:00 2001
  1756. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1757. Date: Mon, 6 Feb 2023 21:10:52 -0800
  1758. Subject: [PATCH] thermal: intel: hfi: Introduce Intel Thread Director classes
  1759. On Intel hybrid parts, each type of CPU has specific performance and
  1760. energy efficiency capabilities. The Intel Thread Director technology
  1761. extends the Hardware Feedback Interface (HFI) to provide performance and
  1762. energy efficiency data for advanced classes of instructions.
  1763. Add support to parse per-class capabilities.
  1764. Cc: Ben Segall <bsegall@google.com>
  1765. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1766. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1767. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1768. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1769. Cc: Len Brown <len.brown@intel.com>
  1770. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1771. Cc: Mel Gorman <mgorman@suse.de>
  1772. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1773. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1774. Cc: Steven Rostedt <rostedt@goodmis.org>
  1775. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1776. Cc: Valentin Schneider <vschneid@redhat.com>
  1777. Cc: x86@kernel.org
  1778. Cc: linux-pm@vger.kernel.org
  1779. Cc: linux-kernel@vger.kernel.org
  1780. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1781. Patchset: intel-thread-director
  1782. ---
  1783. drivers/thermal/intel/intel_hfi.c | 30 ++++++++++++++++++++++++------
  1784. 1 file changed, 24 insertions(+), 6 deletions(-)
  1785. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  1786. index 239afe02e5182..ae7eec197f680 100644
  1787. --- a/drivers/thermal/intel/intel_hfi.c
  1788. +++ b/drivers/thermal/intel/intel_hfi.c
  1789. @@ -79,7 +79,7 @@ union cpuid6_edx {
  1790. * @ee_cap: Energy efficiency capability
  1791. *
  1792. * Capabilities of a logical processor in the HFI table. These capabilities are
  1793. - * unitless.
  1794. + * unitless and specific to each HFI class.
  1795. */
  1796. struct hfi_cpu_data {
  1797. u8 perf_cap;
  1798. @@ -91,7 +91,8 @@ struct hfi_cpu_data {
  1799. * @perf_updated: Hardware updated performance capabilities
  1800. * @ee_updated: Hardware updated energy efficiency capabilities
  1801. *
  1802. - * Properties of the data in an HFI table.
  1803. + * Properties of the data in an HFI table. There exists one header per each
  1804. + * HFI class.
  1805. */
  1806. struct hfi_hdr {
  1807. u8 perf_updated;
  1808. @@ -129,16 +130,21 @@ struct hfi_instance {
  1809. /**
  1810. * struct hfi_features - Supported HFI features
  1811. + * @nr_classes: Number of classes supported
  1812. * @nr_table_pages: Size of the HFI table in 4KB pages
  1813. * @cpu_stride: Stride size to locate the capability data of a logical
  1814. * processor within the table (i.e., row stride)
  1815. + * @class_stride: Stride size to locate a class within the capability
  1816. + * data of a logical processor or the HFI table header
  1817. * @hdr_size: Size of the table header
  1818. *
  1819. * Parameters and supported features that are common to all HFI instances
  1820. */
  1821. struct hfi_features {
  1822. + unsigned int nr_classes;
  1823. size_t nr_table_pages;
  1824. unsigned int cpu_stride;
  1825. + unsigned int class_stride;
  1826. unsigned int hdr_size;
  1827. };
  1828. @@ -325,8 +331,8 @@ static void init_hfi_cpu_index(struct hfi_cpu_info *info)
  1829. }
  1830. /*
  1831. - * The format of the HFI table depends on the number of capabilities that the
  1832. - * hardware supports. Keep a data structure to navigate the table.
  1833. + * The format of the HFI table depends on the number of capabilities and classes
  1834. + * that the hardware supports. Keep a data structure to navigate the table.
  1835. */
  1836. static void init_hfi_instance(struct hfi_instance *hfi_instance)
  1837. {
  1838. @@ -507,18 +513,30 @@ static __init int hfi_parse_features(void)
  1839. /* The number of 4KB pages required by the table */
  1840. hfi_features.nr_table_pages = edx.split.table_pages + 1;
  1841. + /*
  1842. + * Capability fields of an HFI class are grouped together. Classes are
  1843. + * contiguous in memory. Hence, use the number of supported features to
  1844. + * locate a specific class.
  1845. + */
  1846. + hfi_features.class_stride = nr_capabilities;
  1847. +
  1848. + /* For now, use only one class of the HFI table */
  1849. + hfi_features.nr_classes = 1;
  1850. +
  1851. /*
  1852. * The header contains change indications for each supported feature.
  1853. * The size of the table header is rounded up to be a multiple of 8
  1854. * bytes.
  1855. */
  1856. - hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
  1857. + hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities *
  1858. + hfi_features.nr_classes, 8) * 8;
  1859. /*
  1860. * Data of each logical processor is also rounded up to be a multiple
  1861. * of 8 bytes.
  1862. */
  1863. - hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
  1864. + hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities *
  1865. + hfi_features.nr_classes, 8) * 8;
  1866. return 0;
  1867. }
  1868. --
  1869. 2.39.2
  1870. From 78b14aa7d839c69d3073e92c54445a085b22df4f Mon Sep 17 00:00:00 2001
  1871. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1872. Date: Mon, 6 Feb 2023 21:10:53 -0800
  1873. Subject: [PATCH] x86/cpufeatures: Add the Intel Thread Director feature
  1874. definitions
  1875. Intel Thread Director (ITD) provides hardware resources to classify
  1876. the current task. The classification reflects the type of instructions that
  1877. a task currently executes.
  1878. ITD extends the Hardware Feedback Interface table to provide performance
  1879. and energy efficiency capabilities for each of the supported classes of
  1880. tasks.
  1881. Cc: Ben Segall <bsegall@google.com>
  1882. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1883. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1884. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1885. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1886. Cc: Len Brown <len.brown@intel.com>
  1887. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1888. Cc: Mel Gorman <mgorman@suse.de>
  1889. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1890. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1891. Cc: Steven Rostedt <rostedt@goodmis.org>
  1892. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1893. Cc: Valentin Schneider <vschneid@redhat.com>
  1894. Cc: x86@kernel.org
  1895. Cc: linux-pm@vger.kernel.org
  1896. Cc: linux-kernel@vger.kernel.org
  1897. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1898. Patchset: intel-thread-director
  1899. ---
  1900. arch/x86/include/asm/cpufeatures.h | 1 +
  1901. arch/x86/include/asm/disabled-features.h | 8 +++++++-
  1902. arch/x86/kernel/cpu/cpuid-deps.c | 1 +
  1903. 3 files changed, 9 insertions(+), 1 deletion(-)
  1904. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
  1905. index 92729c38853d1..02f10cd5c7536 100644
  1906. --- a/arch/x86/include/asm/cpufeatures.h
  1907. +++ b/arch/x86/include/asm/cpufeatures.h
  1908. @@ -342,6 +342,7 @@
  1909. #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
  1910. #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
  1911. #define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
  1912. +#define X86_FEATURE_ITD (14*32+23) /* Intel Thread Director */
  1913. /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
  1914. #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
  1915. diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
  1916. index 33d2cd04d2544..7a668f6d0502d 100644
  1917. --- a/arch/x86/include/asm/disabled-features.h
  1918. +++ b/arch/x86/include/asm/disabled-features.h
  1919. @@ -87,6 +87,12 @@
  1920. # define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
  1921. #endif
  1922. +#ifdef CONFIG_IPC_CLASSES
  1923. +# define DISABLE_ITD 0
  1924. +#else
  1925. +# define DISABLE_ITD (1 << (X86_FEATURE_ITD & 31))
  1926. +#endif
  1927. +
  1928. /*
  1929. * Make sure to add features to the correct mask
  1930. */
  1931. @@ -104,7 +110,7 @@
  1932. #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
  1933. #define DISABLED_MASK12 0
  1934. #define DISABLED_MASK13 0
  1935. -#define DISABLED_MASK14 0
  1936. +#define DISABLED_MASK14 (DISABLE_ITD)
  1937. #define DISABLED_MASK15 0
  1938. #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
  1939. DISABLE_ENQCMD)
  1940. diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
  1941. index c881bcafba7d7..f6f8a3cd4f2ce 100644
  1942. --- a/arch/x86/kernel/cpu/cpuid-deps.c
  1943. +++ b/arch/x86/kernel/cpu/cpuid-deps.c
  1944. @@ -78,6 +78,7 @@ static const struct cpuid_dep cpuid_deps[] = {
  1945. { X86_FEATURE_XFD, X86_FEATURE_XSAVES },
  1946. { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
  1947. { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
  1948. + { X86_FEATURE_ITD, X86_FEATURE_HFI },
  1949. {}
  1950. };
  1951. --
  1952. 2.39.2
  1953. From b4ce6b73c21395712049b4eeb71e55bf617be6f2 Mon Sep 17 00:00:00 2001
  1954. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1955. Date: Mon, 6 Feb 2023 21:10:54 -0800
  1956. Subject: [PATCH] thermal: intel: hfi: Store per-CPU IPCC scores
  1957. The scheduler reads the IPCC scores when balancing load. These reads can
  1958. be quite frequent. Hardware can also update the HFI table frequently.
  1959. Concurrent access may cause a lot of lock contention. It gets worse as the
  1960. number of CPUs increases.
  1961. Instead, create separate per-CPU IPCC scores that the scheduler can read
  1962. without the HFI table lock.
  1963. Cc: Ben Segall <bsegall@google.com>
  1964. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  1965. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  1966. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  1967. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  1968. Cc: Len Brown <len.brown@intel.com>
  1969. Cc: Lukasz Luba <lukasz.luba@arm.com>
  1970. Cc: Mel Gorman <mgorman@suse.de>
  1971. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  1972. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  1973. Cc: Steven Rostedt <rostedt@goodmis.org>
  1974. Cc: Tim C. Chen <tim.c.chen@intel.com>
  1975. Cc: Valentin Schneider <vschneid@redhat.com>
  1976. Cc: x86@kernel.org
  1977. Cc: linux-pm@vger.kernel.org
  1978. Cc: linux-kernel@vger.kernel.org
  1979. Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
  1980. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  1981. Patchset: intel-thread-director
  1982. ---
  1983. drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
  1984. 1 file changed, 46 insertions(+)
  1985. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  1986. index ae7eec197f680..e84dcfbef0dd9 100644
  1987. --- a/drivers/thermal/intel/intel_hfi.c
  1988. +++ b/drivers/thermal/intel/intel_hfi.c
  1989. @@ -29,6 +29,7 @@
  1990. #include <linux/kernel.h>
  1991. #include <linux/math.h>
  1992. #include <linux/mutex.h>
  1993. +#include <linux/percpu.h>
  1994. #include <linux/percpu-defs.h>
  1995. #include <linux/printk.h>
  1996. #include <linux/processor.h>
  1997. @@ -172,6 +173,43 @@ static struct workqueue_struct *hfi_updates_wq;
  1998. #define HFI_UPDATE_INTERVAL HZ
  1999. #define HFI_MAX_THERM_NOTIFY_COUNT 16
  2000. +#ifdef CONFIG_IPC_CLASSES
  2001. +static int __percpu *hfi_ipcc_scores;
  2002. +
  2003. +static int alloc_hfi_ipcc_scores(void)
  2004. +{
  2005. + if (!cpu_feature_enabled(X86_FEATURE_ITD))
  2006. + return 0;
  2007. +
  2008. + hfi_ipcc_scores = __alloc_percpu(sizeof(*hfi_ipcc_scores) *
  2009. + hfi_features.nr_classes,
  2010. + sizeof(*hfi_ipcc_scores));
  2011. +
  2012. + return !hfi_ipcc_scores;
  2013. +}
  2014. +
  2015. +static void set_hfi_ipcc_score(void *caps, int cpu)
  2016. +{
  2017. + int i, *hfi_class;
  2018. +
  2019. + if (!cpu_feature_enabled(X86_FEATURE_ITD))
  2020. + return;
  2021. +
  2022. + hfi_class = per_cpu_ptr(hfi_ipcc_scores, cpu);
  2023. +
  2024. + for (i = 0; i < hfi_features.nr_classes; i++) {
  2025. + struct hfi_cpu_data *class_caps;
  2026. +
  2027. + class_caps = caps + i * hfi_features.class_stride;
  2028. + WRITE_ONCE(hfi_class[i], class_caps->perf_cap);
  2029. + }
  2030. +}
  2031. +
  2032. +#else
  2033. +static int alloc_hfi_ipcc_scores(void) { return 0; }
  2034. +static void set_hfi_ipcc_score(void *caps, int cpu) { }
  2035. +#endif /* CONFIG_IPC_CLASSES */
  2036. +
  2037. static void get_hfi_caps(struct hfi_instance *hfi_instance,
  2038. struct thermal_genl_cpu_caps *cpu_caps)
  2039. {
  2040. @@ -194,6 +232,8 @@ static void get_hfi_caps(struct hfi_instance *hfi_instance,
  2041. cpu_caps[i].efficiency = caps->ee_cap << 2;
  2042. ++i;
  2043. +
  2044. + set_hfi_ipcc_score(caps, cpu);
  2045. }
  2046. raw_spin_unlock_irq(&hfi_instance->table_lock);
  2047. }
  2048. @@ -572,8 +612,14 @@ void __init intel_hfi_init(void)
  2049. if (!hfi_updates_wq)
  2050. goto err_nomem;
  2051. + if (alloc_hfi_ipcc_scores())
  2052. + goto err_ipcc;
  2053. +
  2054. return;
  2055. +err_ipcc:
  2056. + destroy_workqueue(hfi_updates_wq);
  2057. +
  2058. err_nomem:
  2059. for (j = 0; j < i; ++j) {
  2060. hfi_instance = &hfi_instances[j];
  2061. --
  2062. 2.39.2
  2063. From 8826c8ac9227c660003590b333f80a0d144cba2e Mon Sep 17 00:00:00 2001
  2064. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2065. Date: Mon, 6 Feb 2023 21:10:55 -0800
  2066. Subject: [PATCH] thermal: intel: hfi: Update the IPC class of the current task
  2067. Use Intel Thread Director classification to update the IPC class of a
  2068. task. Implement the arch_update_ipcc() interface of the scheduler.
  2069. Cc: Ben Segall <bsegall@google.com>
  2070. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2071. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2072. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2073. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2074. Cc: Len Brown <len.brown@intel.com>
  2075. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2076. Cc: Mel Gorman <mgorman@suse.de>
  2077. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2078. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2079. Cc: Steven Rostedt <rostedt@goodmis.org>
  2080. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2081. Cc: Valentin Schneider <vschneid@redhat.com>
  2082. Cc: x86@kernel.org
  2083. Cc: linux-pm@vger.kernel.org
  2084. Cc: linux-kernel@vger.kernel.org
  2085. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2086. Patchset: intel-thread-director
  2087. ---
  2088. arch/x86/include/asm/topology.h | 6 ++++++
  2089. drivers/thermal/intel/intel_hfi.c | 32 +++++++++++++++++++++++++++++++
  2090. 2 files changed, 38 insertions(+)
  2091. diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
  2092. index 458c891a82736..ffcdac3f398f0 100644
  2093. --- a/arch/x86/include/asm/topology.h
  2094. +++ b/arch/x86/include/asm/topology.h
  2095. @@ -227,4 +227,10 @@ void init_freq_invariance_cppc(void);
  2096. #define arch_init_invariance_cppc init_freq_invariance_cppc
  2097. #endif
  2098. +#if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
  2099. +void intel_hfi_update_ipcc(struct task_struct *curr);
  2100. +
  2101. +#define arch_update_ipcc intel_hfi_update_ipcc
  2102. +#endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
  2103. +
  2104. #endif /* _ASM_X86_TOPOLOGY_H */
  2105. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2106. index e84dcfbef0dd9..f2de597b3118a 100644
  2107. --- a/drivers/thermal/intel/intel_hfi.c
  2108. +++ b/drivers/thermal/intel/intel_hfi.c
  2109. @@ -74,6 +74,17 @@ union cpuid6_edx {
  2110. u32 full;
  2111. };
  2112. +#ifdef CONFIG_IPC_CLASSES
  2113. +union hfi_thread_feedback_char_msr {
  2114. + struct {
  2115. + u64 classid : 8;
  2116. + u64 __reserved : 55;
  2117. + u64 valid : 1;
  2118. + } split;
  2119. + u64 full;
  2120. +};
  2121. +#endif
  2122. +
  2123. /**
  2124. * struct hfi_cpu_data - HFI capabilities per CPU
  2125. * @perf_cap: Performance capability
  2126. @@ -176,6 +187,27 @@ static struct workqueue_struct *hfi_updates_wq;
  2127. #ifdef CONFIG_IPC_CLASSES
  2128. static int __percpu *hfi_ipcc_scores;
  2129. +void intel_hfi_update_ipcc(struct task_struct *curr)
  2130. +{
  2131. + union hfi_thread_feedback_char_msr msr;
  2132. +
  2133. + /* We should not be here if ITD is not supported. */
  2134. + if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
  2135. + pr_warn_once("task classification requested but not supported!");
  2136. + return;
  2137. + }
  2138. +
  2139. + rdmsrl(MSR_IA32_HW_FEEDBACK_CHAR, msr.full);
  2140. + if (!msr.split.valid)
  2141. + return;
  2142. +
  2143. + /*
  2144. + * 0 is a valid classification for Intel Thread Director. A scheduler
  2145. + * IPCC class of 0 means that the task is unclassified. Adjust.
  2146. + */
  2147. + curr->ipcc = msr.split.classid + 1;
  2148. +}
  2149. +
  2150. static int alloc_hfi_ipcc_scores(void)
  2151. {
  2152. if (!cpu_feature_enabled(X86_FEATURE_ITD))
  2153. --
  2154. 2.39.2
  2155. From e6c5f3a62bb1fc563887cca4df785b49f77ef0af Mon Sep 17 00:00:00 2001
  2156. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2157. Date: Mon, 6 Feb 2023 21:10:56 -0800
  2158. Subject: [PATCH] thermal: intel: hfi: Report the IPC class score of a CPU
  2159. Implement the arch_get_ipcc_score() interface of the scheduler. Use the
  2160. performance capabilities of the extended Hardware Feedback Interface table
  2161. as the IPC score.
  2162. Cc: Ben Segall <bsegall@google.com>
  2163. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2164. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2165. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2166. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2167. Cc: Len Brown <len.brown@intel.com>
  2168. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2169. Cc: Mel Gorman <mgorman@suse.de>
  2170. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2171. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2172. Cc: Steven Rostedt <rostedt@goodmis.org>
  2173. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2174. Cc: Valentin Schneider <vschneid@redhat.com>
  2175. Cc: x86@kernel.org
  2176. Cc: linux-pm@vger.kernel.org
  2177. Cc: linux-kernel@vger.kernel.org
  2178. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2179. Patchset: intel-thread-director
  2180. ---
  2181. arch/x86/include/asm/topology.h | 2 ++
  2182. drivers/thermal/intel/intel_hfi.c | 27 +++++++++++++++++++++++++++
  2183. 2 files changed, 29 insertions(+)
  2184. diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
  2185. index ffcdac3f398f0..c4fcd9c3c634f 100644
  2186. --- a/arch/x86/include/asm/topology.h
  2187. +++ b/arch/x86/include/asm/topology.h
  2188. @@ -229,8 +229,10 @@ void init_freq_invariance_cppc(void);
  2189. #if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
  2190. void intel_hfi_update_ipcc(struct task_struct *curr);
  2191. +unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu);
  2192. #define arch_update_ipcc intel_hfi_update_ipcc
  2193. +#define arch_get_ipcc_score intel_hfi_get_ipcc_score
  2194. #endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
  2195. #endif /* _ASM_X86_TOPOLOGY_H */
  2196. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2197. index f2de597b3118a..e8ad2be47e82f 100644
  2198. --- a/drivers/thermal/intel/intel_hfi.c
  2199. +++ b/drivers/thermal/intel/intel_hfi.c
  2200. @@ -208,6 +208,33 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
  2201. curr->ipcc = msr.split.classid + 1;
  2202. }
  2203. +unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
  2204. +{
  2205. + unsigned short hfi_class;
  2206. + int *scores;
  2207. +
  2208. + if (cpu < 0 || cpu >= nr_cpu_ids)
  2209. + return -EINVAL;
  2210. +
  2211. + if (ipcc == IPC_CLASS_UNCLASSIFIED)
  2212. + return -EINVAL;
  2213. +
  2214. + /*
  2215. + * Scheduler IPC classes start at 1. HFI classes start at 0.
  2216. + * See note intel_hfi_update_ipcc().
  2217. + */
  2218. + hfi_class = ipcc - 1;
  2219. +
  2220. + if (hfi_class >= hfi_features.nr_classes)
  2221. + return -EINVAL;
  2222. +
  2223. + scores = per_cpu_ptr(hfi_ipcc_scores, cpu);
  2224. + if (!scores)
  2225. + return -ENODEV;
  2226. +
  2227. + return READ_ONCE(scores[hfi_class]);
  2228. +}
  2229. +
  2230. static int alloc_hfi_ipcc_scores(void)
  2231. {
  2232. if (!cpu_feature_enabled(X86_FEATURE_ITD))
  2233. --
  2234. 2.39.2
  2235. From 502fea0f97180e7e5a9fb1a88d31659d30813348 Mon Sep 17 00:00:00 2001
  2236. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2237. Date: Mon, 6 Feb 2023 21:10:57 -0800
  2238. Subject: [PATCH] thermal: intel: hfi: Define a default class for unclassified
  2239. tasks
  2240. A task may be unclassified if it has been recently created, spend most of
  2241. its lifetime sleeping, or hardware has not provided a classification.
  2242. Most tasks will be eventually classified as scheduler's IPC class 1
  2243. (HFI class 0). This class corresponds to the capabilities in the legacy,
  2244. classless, HFI table.
  2245. IPC class 1 is a reasonable choice until hardware provides an actual
  2246. classification. Meanwhile, the scheduler will place classes of tasks with
  2247. higher IPC scores on higher-performance CPUs.
  2248. Cc: Ben Segall <bsegall@google.com>
  2249. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2250. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2251. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2252. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2253. Cc: Len Brown <len.brown@intel.com>
  2254. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2255. Cc: Mel Gorman <mgorman@suse.de>
  2256. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2257. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2258. Cc: Steven Rostedt <rostedt@goodmis.org>
  2259. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2260. Cc: Valentin Schneider <vschneid@redhat.com>
  2261. Cc: x86@kernel.org
  2262. Cc: linux-pm@vger.kernel.org
  2263. Cc: linux-kernel@vger.kernel.org
  2264. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2265. Patchset: intel-thread-director
  2266. ---
  2267. drivers/thermal/intel/intel_hfi.c | 15 ++++++++++++++-
  2268. 1 file changed, 14 insertions(+), 1 deletion(-)
  2269. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2270. index e8ad2be47e82f..0f6935705e559 100644
  2271. --- a/drivers/thermal/intel/intel_hfi.c
  2272. +++ b/drivers/thermal/intel/intel_hfi.c
  2273. @@ -187,6 +187,19 @@ static struct workqueue_struct *hfi_updates_wq;
  2274. #ifdef CONFIG_IPC_CLASSES
  2275. static int __percpu *hfi_ipcc_scores;
  2276. +/*
  2277. + * A task may be unclassified if it has been recently created, spend most of
  2278. + * its lifetime sleeping, or hardware has not provided a classification.
  2279. + *
  2280. + * Most tasks will be classified as scheduler's IPC class 1 (HFI class 0)
  2281. + * eventually. Meanwhile, the scheduler will place classes of tasks with higher
  2282. + * IPC scores on higher-performance CPUs.
  2283. + *
  2284. + * IPC class 1 is a reasonable choice. It matches the performance capability
  2285. + * of the legacy, classless, HFI table.
  2286. + */
  2287. +#define HFI_UNCLASSIFIED_DEFAULT 1
  2288. +
  2289. void intel_hfi_update_ipcc(struct task_struct *curr)
  2290. {
  2291. union hfi_thread_feedback_char_msr msr;
  2292. @@ -217,7 +230,7 @@ unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
  2293. return -EINVAL;
  2294. if (ipcc == IPC_CLASS_UNCLASSIFIED)
  2295. - return -EINVAL;
  2296. + ipcc = HFI_UNCLASSIFIED_DEFAULT;
  2297. /*
  2298. * Scheduler IPC classes start at 1. HFI classes start at 0.
  2299. --
  2300. 2.39.2
  2301. From 5c8a8a9dc0da32f5cb1321e5cd86e68c7221af74 Mon Sep 17 00:00:00 2001
  2302. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2303. Date: Mon, 6 Feb 2023 21:10:58 -0800
  2304. Subject: [PATCH] thermal: intel: hfi: Enable the Intel Thread Director
  2305. Enable Intel Thread Director from the CPU hotplug callback: globally from
  2306. CPU0 and then enable the thread-classification hardware in each logical
  2307. processor individually.
  2308. Also, initialize the number of classes supported.
  2309. Let the scheduler know that it can start using IPC classes.
  2310. Cc: Ben Segall <bsegall@google.com>
  2311. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2312. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2313. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2314. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2315. Cc: Len Brown <len.brown@intel.com>
  2316. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2317. Cc: Mel Gorman <mgorman@suse.de>
  2318. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2319. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2320. Cc: Steven Rostedt <rostedt@goodmis.org>
  2321. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2322. Cc: Valentin Schneider <vschneid@redhat.com>
  2323. Cc: x86@kernel.org
  2324. Cc: linux-pm@vger.kernel.org
  2325. Cc: linux-kernel@vger.kernel.org
  2326. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2327. Patchset: intel-thread-director
  2328. ---
  2329. arch/x86/include/asm/msr-index.h | 2 ++
  2330. drivers/thermal/intel/intel_hfi.c | 40 +++++++++++++++++++++++++++++--
  2331. 2 files changed, 40 insertions(+), 2 deletions(-)
  2332. diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
  2333. index 117e4e977b55d..b4a5de303b88a 100644
  2334. --- a/arch/x86/include/asm/msr-index.h
  2335. +++ b/arch/x86/include/asm/msr-index.h
  2336. @@ -1089,6 +1089,8 @@
  2337. /* Hardware Feedback Interface */
  2338. #define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
  2339. #define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
  2340. +#define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
  2341. +#define MSR_IA32_HW_FEEDBACK_CHAR 0x17d2
  2342. /* x2APIC locked status */
  2343. #define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
  2344. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2345. index 0f6935705e559..21a0d246ca501 100644
  2346. --- a/drivers/thermal/intel/intel_hfi.c
  2347. +++ b/drivers/thermal/intel/intel_hfi.c
  2348. @@ -50,6 +50,8 @@
  2349. /* Hardware Feedback Interface MSR configuration bits */
  2350. #define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
  2351. #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
  2352. +#define HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT BIT(1)
  2353. +#define HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT BIT(0)
  2354. /* CPUID detection and enumeration definitions for HFI */
  2355. @@ -74,6 +76,15 @@ union cpuid6_edx {
  2356. u32 full;
  2357. };
  2358. +union cpuid6_ecx {
  2359. + struct {
  2360. + u32 dont_care0:8;
  2361. + u32 nr_classes:8;
  2362. + u32 dont_care1:16;
  2363. + } split;
  2364. + u32 full;
  2365. +};
  2366. +
  2367. #ifdef CONFIG_IPC_CLASSES
  2368. union hfi_thread_feedback_char_msr {
  2369. struct {
  2370. @@ -498,6 +509,11 @@ void intel_hfi_online(unsigned int cpu)
  2371. init_hfi_cpu_index(info);
  2372. + if (cpu_feature_enabled(X86_FEATURE_ITD)) {
  2373. + msr_val = HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT;
  2374. + wrmsrl(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, msr_val);
  2375. + }
  2376. +
  2377. /*
  2378. * Now check if the HFI instance of the package/die of @cpu has been
  2379. * initialized (by checking its header). In such case, all we have to
  2380. @@ -553,8 +569,22 @@ void intel_hfi_online(unsigned int cpu)
  2381. */
  2382. rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
  2383. msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
  2384. +
  2385. + if (cpu_feature_enabled(X86_FEATURE_ITD))
  2386. + msr_val |= HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT;
  2387. +
  2388. wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
  2389. + /*
  2390. + * We have all we need to support IPC classes. Task classification is
  2391. + * now working.
  2392. + *
  2393. + * All class scores are zero until after the first HFI update. That is
  2394. + * OK. The scheduler queries these scores at every load balance.
  2395. + */
  2396. + if (cpu_feature_enabled(X86_FEATURE_ITD))
  2397. + sched_enable_ipc_classes();
  2398. +
  2399. unlock:
  2400. mutex_unlock(&hfi_instance_lock);
  2401. return;
  2402. @@ -632,8 +662,14 @@ static __init int hfi_parse_features(void)
  2403. */
  2404. hfi_features.class_stride = nr_capabilities;
  2405. - /* For now, use only one class of the HFI table */
  2406. - hfi_features.nr_classes = 1;
  2407. + if (cpu_feature_enabled(X86_FEATURE_ITD)) {
  2408. + union cpuid6_ecx ecx;
  2409. +
  2410. + ecx.full = cpuid_ecx(CPUID_HFI_LEAF);
  2411. + hfi_features.nr_classes = ecx.split.nr_classes;
  2412. + } else {
  2413. + hfi_features.nr_classes = 1;
  2414. + }
  2415. /*
  2416. * The header contains change indications for each supported feature.
  2417. --
  2418. 2.39.2
  2419. From 721e6a04633384f5f0a837ee1347e99ba9f2357d Mon Sep 17 00:00:00 2001
  2420. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2421. Date: Mon, 6 Feb 2023 21:10:59 -0800
  2422. Subject: [PATCH] sched/task_struct: Add helpers for IPC classification
  2423. The unprocessed classification that hardware provides for a task may not
  2424. be usable by the scheduler: the classification may change too frequently or
  2425. architectures may want to consider extra factors. For instance, some
  2426. processors with Intel Thread Director need to consider the state of the SMT
  2427. siblings of a core.
  2428. Provide per-task helper variables that architectures can use to post-
  2429. process the classification that hardware provides.
  2430. Cc: Ben Segall <bsegall@google.com>
  2431. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2432. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2433. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2434. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2435. Cc: Len Brown <len.brown@intel.com>
  2436. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2437. Cc: Mel Gorman <mgorman@suse.de>
  2438. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2439. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2440. Cc: Steven Rostedt <rostedt@goodmis.org>
  2441. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2442. Cc: Valentin Schneider <vschneid@redhat.com>
  2443. Cc: x86@kernel.org
  2444. Cc: linux-pm@vger.kernel.org
  2445. Cc: linux-kernel@vger.kernel.org
  2446. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2447. Patchset: intel-thread-director
  2448. ---
  2449. include/linux/sched.h | 12 +++++++++++-
  2450. 1 file changed, 11 insertions(+), 1 deletion(-)
  2451. diff --git a/include/linux/sched.h b/include/linux/sched.h
  2452. index e58dc7503864c..63c2f88f0168a 100644
  2453. --- a/include/linux/sched.h
  2454. +++ b/include/linux/sched.h
  2455. @@ -1535,7 +1535,17 @@ struct task_struct {
  2456. * A hardware-defined classification of task that reflects but is
  2457. * not identical to the number of instructions per cycle.
  2458. */
  2459. - unsigned short ipcc;
  2460. + unsigned int ipcc : 9;
  2461. + /*
  2462. + * A candidate classification that arch-specific implementations
  2463. + * qualify for correctness.
  2464. + */
  2465. + unsigned int ipcc_tmp : 9;
  2466. + /*
  2467. + * Counter to filter out transient candidate classifications
  2468. + * of a task.
  2469. + */
  2470. + unsigned int ipcc_cntr : 14;
  2471. #endif
  2472. /*
  2473. --
  2474. 2.39.2
  2475. From afb386225a0c507949d96ea5122003edce813bb0 Mon Sep 17 00:00:00 2001
  2476. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2477. Date: Mon, 6 Feb 2023 21:11:00 -0800
  2478. Subject: [PATCH] sched/core: Initialize helpers of task classification
  2479. Just as tasks start life unclassified, initialize the classification
  2480. auxiliar variables.
  2481. Cc: Ben Segall <bsegall@google.com>
  2482. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2483. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2484. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2485. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2486. Cc: Len Brown <len.brown@intel.com>
  2487. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2488. Cc: Mel Gorman <mgorman@suse.de>
  2489. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2490. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2491. Cc: Steven Rostedt <rostedt@goodmis.org>
  2492. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2493. Cc: Valentin Schneider <vschneid@redhat.com>
  2494. Cc: x86@kernel.org
  2495. Cc: linux-pm@vger.kernel.org
  2496. Cc: linux-kernel@vger.kernel.org
  2497. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2498. Patchset: intel-thread-director
  2499. ---
  2500. kernel/sched/core.c | 2 ++
  2501. 1 file changed, 2 insertions(+)
  2502. diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  2503. index 4782b1359eb89..d9a026845d51c 100644
  2504. --- a/kernel/sched/core.c
  2505. +++ b/kernel/sched/core.c
  2506. @@ -4379,6 +4379,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
  2507. p->se.vruntime = 0;
  2508. #ifdef CONFIG_IPC_CLASSES
  2509. p->ipcc = IPC_CLASS_UNCLASSIFIED;
  2510. + p->ipcc_tmp = IPC_CLASS_UNCLASSIFIED;
  2511. + p->ipcc_cntr = 0;
  2512. #endif
  2513. INIT_LIST_HEAD(&p->se.group_node);
  2514. --
  2515. 2.39.2
  2516. From 20d6343c567733d5a2704e1f4bb437acd115683e Mon Sep 17 00:00:00 2001
  2517. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2518. Date: Mon, 6 Feb 2023 21:11:01 -0800
  2519. Subject: [PATCH] sched/fair: Introduce sched_smt_siblings_idle()
  2520. X86 needs to know the idle state of the SMT siblings of a CPU to improve
  2521. the accuracy of IPCC classification. X86 implements support for IPC classes
  2522. in the thermal HFI driver.
  2523. Rename is_core_idle() as sched_smt_siblings_idle() and make it available
  2524. outside the scheduler code.
  2525. Cc: Ben Segall <bsegall@google.com>
  2526. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2527. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2528. Cc: Len Brown <len.brown@intel.com>
  2529. Cc: Mel Gorman <mgorman@suse.de>
  2530. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2531. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2532. Cc: Steven Rostedt <rostedt@goodmis.org>
  2533. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2534. Cc: Valentin Schneider <vschneid@redhat.com>
  2535. Cc: x86@kernel.org
  2536. Cc: linux-kernel@vger.kernel.org
  2537. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2538. Patchset: intel-thread-director
  2539. ---
  2540. include/linux/sched.h | 2 ++
  2541. kernel/sched/fair.c | 21 +++++++++++++++------
  2542. 2 files changed, 17 insertions(+), 6 deletions(-)
  2543. diff --git a/include/linux/sched.h b/include/linux/sched.h
  2544. index 63c2f88f0168a..b049584f4c1a5 100644
  2545. --- a/include/linux/sched.h
  2546. +++ b/include/linux/sched.h
  2547. @@ -2446,4 +2446,6 @@ static inline void sched_core_fork(struct task_struct *p) { }
  2548. extern void sched_set_stop_task(int cpu, struct task_struct *stop);
  2549. +extern bool sched_smt_siblings_idle(int cpu);
  2550. +
  2551. #endif
  2552. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
  2553. index fc42b58f1ba42..ff1fd953258bd 100644
  2554. --- a/kernel/sched/fair.c
  2555. +++ b/kernel/sched/fair.c
  2556. @@ -1049,7 +1049,14 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  2557. * Scheduling class queueing methods:
  2558. */
  2559. -static inline bool is_core_idle(int cpu)
  2560. +/**
  2561. + * sched_smt_siblings_idle - Check whether SMT siblings of a CPU are idle
  2562. + * @cpu: The CPU to check
  2563. + *
  2564. + * Returns true if all the SMT siblings of @cpu are idle or @cpu does not have
  2565. + * SMT siblings. The idle state of @cpu is not considered.
  2566. + */
  2567. +bool sched_smt_siblings_idle(int cpu)
  2568. {
  2569. #ifdef CONFIG_SCHED_SMT
  2570. int sibling;
  2571. @@ -1755,7 +1762,7 @@ static inline int numa_idle_core(int idle_core, int cpu)
  2572. * Prefer cores instead of packing HT siblings
  2573. * and triggering future load balancing.
  2574. */
  2575. - if (is_core_idle(cpu))
  2576. + if (sched_smt_siblings_idle(cpu))
  2577. idle_core = cpu;
  2578. return idle_core;
  2579. @@ -9306,7 +9313,8 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
  2580. * If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
  2581. * is not sufficient. We need to make sure the whole core is idle.
  2582. */
  2583. - if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
  2584. + if (sds->local->flags & SD_SHARE_CPUCAPACITY &&
  2585. + !sched_smt_siblings_idle(env->dst_cpu))
  2586. return false;
  2587. /* Only do SMT checks if either local or candidate have SMT siblings. */
  2588. @@ -10475,7 +10483,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
  2589. sched_asym_prefer(i, env->dst_cpu) &&
  2590. nr_running == 1) {
  2591. if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
  2592. - (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
  2593. + (!(env->sd->flags & SD_SHARE_CPUCAPACITY) &&
  2594. + sched_smt_siblings_idle(i)))
  2595. continue;
  2596. }
  2597. @@ -10604,7 +10613,7 @@ asym_active_balance(struct lb_env *env)
  2598. * busy sibling.
  2599. */
  2600. return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
  2601. - !is_core_idle(env->src_cpu);
  2602. + !sched_smt_siblings_idle(env->src_cpu);
  2603. }
  2604. return false;
  2605. @@ -11351,7 +11360,7 @@ static void nohz_balancer_kick(struct rq *rq)
  2606. */
  2607. if (sd->flags & SD_SHARE_CPUCAPACITY ||
  2608. (!(sd->flags & SD_SHARE_CPUCAPACITY) &&
  2609. - is_core_idle(i))) {
  2610. + sched_smt_siblings_idle(i))) {
  2611. flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
  2612. goto unlock;
  2613. }
  2614. --
  2615. 2.39.2
  2616. From 0a346e6136d9ca6c52ef6061e5d24b231629cd67 Mon Sep 17 00:00:00 2001
  2617. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2618. Date: Mon, 6 Feb 2023 21:11:02 -0800
  2619. Subject: [PATCH] thermal: intel: hfi: Implement model-specific checks for task
  2620. classification
  2621. In Alder Lake and Raptor Lake, the result of thread classification is more
  2622. accurate when only one SMT sibling is busy. Classification results for
  2623. class 2 and 3 are always reliable.
  2624. To avoid unnecessary migrations, only update the class of a task if it has
  2625. been the same during 4 consecutive user ticks.
  2626. Cc: Ben Segall <bsegall@google.com>
  2627. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2628. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2629. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2630. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2631. Cc: Len Brown <len.brown@intel.com>
  2632. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2633. Cc: Mel Gorman <mgorman@suse.de>
  2634. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2635. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2636. Cc: Steven Rostedt <rostedt@goodmis.org>
  2637. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2638. Cc: Valentin Schneider <vschneid@redhat.com>
  2639. Cc: x86@kernel.org
  2640. Cc: linux-pm@vger.kernel.org
  2641. Cc: linux-kernel@vger.kernel.org
  2642. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2643. Patchset: intel-thread-director
  2644. ---
  2645. drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
  2646. 1 file changed, 59 insertions(+), 1 deletion(-)
  2647. diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
  2648. index 21a0d246ca501..751b84b6b8fda 100644
  2649. --- a/drivers/thermal/intel/intel_hfi.c
  2650. +++ b/drivers/thermal/intel/intel_hfi.c
  2651. @@ -40,6 +40,7 @@
  2652. #include <linux/workqueue.h>
  2653. #include <asm/msr.h>
  2654. +#include <asm/intel-family.h>
  2655. #include "../thermal_core.h"
  2656. #include "intel_hfi.h"
  2657. @@ -211,9 +212,64 @@ static int __percpu *hfi_ipcc_scores;
  2658. */
  2659. #define HFI_UNCLASSIFIED_DEFAULT 1
  2660. +#define CLASS_DEBOUNCER_SKIPS 4
  2661. +
  2662. +/**
  2663. + * debounce_and_update_class() - Process and update a task's classification
  2664. + *
  2665. + * @p: The task of which the classification will be updated
  2666. + * @new_ipcc: The new IPC classification
  2667. + *
  2668. + * Update the classification of @p with the new value that hardware provides.
  2669. + * Only update the classification of @p if it has been the same during
  2670. + * CLASS_DEBOUNCER_SKIPS consecutive ticks.
  2671. + */
  2672. +static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
  2673. +{
  2674. + u16 debounce_skip;
  2675. +
  2676. + /* The class of @p changed. Only restart the debounce counter. */
  2677. + if (p->ipcc_tmp != new_ipcc) {
  2678. + p->ipcc_cntr = 1;
  2679. + goto out;
  2680. + }
  2681. +
  2682. + /*
  2683. + * The class of @p did not change. Update it if it has been the same
  2684. + * for CLASS_DEBOUNCER_SKIPS user ticks.
  2685. + */
  2686. + debounce_skip = p->ipcc_cntr + 1;
  2687. + if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
  2688. + p->ipcc_cntr++;
  2689. + else
  2690. + p->ipcc = new_ipcc;
  2691. +
  2692. +out:
  2693. + p->ipcc_tmp = new_ipcc;
  2694. +}
  2695. +
  2696. +static bool classification_is_accurate(u8 hfi_class, bool smt_siblings_idle)
  2697. +{
  2698. + switch (boot_cpu_data.x86_model) {
  2699. + case INTEL_FAM6_ALDERLAKE:
  2700. + case INTEL_FAM6_ALDERLAKE_L:
  2701. + case INTEL_FAM6_RAPTORLAKE:
  2702. + case INTEL_FAM6_RAPTORLAKE_P:
  2703. + case INTEL_FAM6_RAPTORLAKE_S:
  2704. + if (hfi_class == 3 || hfi_class == 2 || smt_siblings_idle)
  2705. + return true;
  2706. +
  2707. + return false;
  2708. +
  2709. + default:
  2710. + return true;
  2711. + }
  2712. +}
  2713. +
  2714. void intel_hfi_update_ipcc(struct task_struct *curr)
  2715. {
  2716. union hfi_thread_feedback_char_msr msr;
  2717. + bool idle;
  2718. /* We should not be here if ITD is not supported. */
  2719. if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
  2720. @@ -229,7 +285,9 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
  2721. * 0 is a valid classification for Intel Thread Director. A scheduler
  2722. * IPCC class of 0 means that the task is unclassified. Adjust.
  2723. */
  2724. - curr->ipcc = msr.split.classid + 1;
  2725. + idle = sched_smt_siblings_idle(task_cpu(curr));
  2726. + if (classification_is_accurate(msr.split.classid, idle))
  2727. + debounce_and_update_class(curr, msr.split.classid + 1);
  2728. }
  2729. unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
  2730. --
  2731. 2.39.2
  2732. From 0ae9abd0c6e2109c538599229c213539fb1386ca Mon Sep 17 00:00:00 2001
  2733. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2734. Date: Mon, 6 Feb 2023 21:11:03 -0800
  2735. Subject: [PATCH] x86/cpufeatures: Add feature bit for HRESET
  2736. The HRESET instruction prevents the classification of the current task
  2737. from influencing the classification of the next task when running serially
  2738. on the same logical processor.
  2739. Cc: Ben Segall <bsegall@google.com>
  2740. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2741. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2742. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2743. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2744. Cc: Len Brown <len.brown@intel.com>
  2745. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2746. Cc: Mel Gorman <mgorman@suse.de>
  2747. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2748. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2749. Cc: Steven Rostedt <rostedt@goodmis.org>
  2750. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2751. Cc: Valentin Schneider <vschneid@redhat.com>
  2752. Cc: x86@kernel.org
  2753. Cc: linux-pm@vger.kernel.org
  2754. Cc: linux-kernel@vger.kernel.org
  2755. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2756. Patchset: intel-thread-director
  2757. ---
  2758. arch/x86/include/asm/cpufeatures.h | 1 +
  2759. arch/x86/include/asm/msr-index.h | 4 +++-
  2760. arch/x86/kernel/cpu/scattered.c | 1 +
  2761. 3 files changed, 5 insertions(+), 1 deletion(-)
  2762. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
  2763. index 02f10cd5c7536..2fc261302f5cc 100644
  2764. --- a/arch/x86/include/asm/cpufeatures.h
  2765. +++ b/arch/x86/include/asm/cpufeatures.h
  2766. @@ -307,6 +307,7 @@
  2767. #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
  2768. +#define X86_FEATURE_HRESET (11*32+23) /* Hardware history reset instruction */
  2769. /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
  2770. #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
  2771. diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
  2772. index b4a5de303b88a..4ef01ce8eadb5 100644
  2773. --- a/arch/x86/include/asm/msr-index.h
  2774. +++ b/arch/x86/include/asm/msr-index.h
  2775. @@ -1092,6 +1092,9 @@
  2776. #define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
  2777. #define MSR_IA32_HW_FEEDBACK_CHAR 0x17d2
  2778. +/* Hardware History Reset */
  2779. +#define MSR_IA32_HW_HRESET_ENABLE 0x17da
  2780. +
  2781. /* x2APIC locked status */
  2782. #define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
  2783. #define LEGACY_XAPIC_DISABLED BIT(0) /*
  2784. @@ -1099,5 +1102,4 @@
  2785. * disabling x2APIC will cause
  2786. * a #GP
  2787. */
  2788. -
  2789. #endif /* _ASM_X86_MSR_INDEX_H */
  2790. diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
  2791. index fc01f81f6e2a3..b8ad312d36949 100644
  2792. --- a/arch/x86/kernel/cpu/scattered.c
  2793. +++ b/arch/x86/kernel/cpu/scattered.c
  2794. @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
  2795. { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
  2796. { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
  2797. { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
  2798. + { X86_FEATURE_HRESET, CPUID_EAX, 22, 0x00000007, 1 },
  2799. { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
  2800. { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
  2801. { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
  2802. --
  2803. 2.39.2
  2804. From dcf0f4ab904b1ef3090f2de141f55ff42bf9b0f2 Mon Sep 17 00:00:00 2001
  2805. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2806. Date: Mon, 6 Feb 2023 21:11:04 -0800
  2807. Subject: [PATCH] x86/hreset: Configure history reset
  2808. Configure the MSR that controls the behavior of HRESET on each logical
  2809. processor.
  2810. Cc: Ben Segall <bsegall@google.com>
  2811. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2812. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2813. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2814. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2815. Cc: Len Brown <len.brown@intel.com>
  2816. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2817. Cc: Mel Gorman <mgorman@suse.de>
  2818. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2819. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2820. Cc: Steven Rostedt <rostedt@goodmis.org>
  2821. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2822. Cc: Valentin Schneider <vschneid@redhat.com>
  2823. Cc: x86@kernel.org
  2824. Cc: linux-pm@vger.kernel.org
  2825. Cc: linux-kernel@vger.kernel.org
  2826. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2827. Patchset: intel-thread-director
  2828. ---
  2829. arch/x86/kernel/cpu/common.c | 23 ++++++++++++++++++++++-
  2830. 1 file changed, 22 insertions(+), 1 deletion(-)
  2831. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
  2832. index c34bdba57993a..98b48cf80ec21 100644
  2833. --- a/arch/x86/kernel/cpu/common.c
  2834. +++ b/arch/x86/kernel/cpu/common.c
  2835. @@ -411,6 +411,26 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
  2836. cr4_clear_bits(X86_CR4_UMIP);
  2837. }
  2838. +static u32 hardware_history_features __ro_after_init;
  2839. +
  2840. +static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
  2841. +{
  2842. + if (!cpu_feature_enabled(X86_FEATURE_HRESET))
  2843. + return;
  2844. +
  2845. + /*
  2846. + * Use on all CPUs the hardware history features that the boot
  2847. + * CPU supports.
  2848. + */
  2849. + if (c == &boot_cpu_data)
  2850. + hardware_history_features = cpuid_ebx(0x20);
  2851. +
  2852. + if (!hardware_history_features)
  2853. + return;
  2854. +
  2855. + wrmsrl(MSR_IA32_HW_HRESET_ENABLE, hardware_history_features);
  2856. +}
  2857. +
  2858. /* These bits should not change their value after CPU init is finished. */
  2859. static const unsigned long cr4_pinned_mask =
  2860. X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
  2861. @@ -1828,10 +1848,11 @@ static void identify_cpu(struct cpuinfo_x86 *c)
  2862. /* Disable the PN if appropriate */
  2863. squash_the_stupid_serial_number(c);
  2864. - /* Set up SMEP/SMAP/UMIP */
  2865. + /* Set up SMEP/SMAP/UMIP/HRESET */
  2866. setup_smep(c);
  2867. setup_smap(c);
  2868. setup_umip(c);
  2869. + setup_hreset(c);
  2870. /* Enable FSGSBASE instructions if available. */
  2871. if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
  2872. --
  2873. 2.39.2
  2874. From d562f8856447bb5019dad771dab745821b383939 Mon Sep 17 00:00:00 2001
  2875. From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2876. Date: Mon, 6 Feb 2023 21:11:05 -0800
  2877. Subject: [PATCH] x86/process: Reset hardware history in context switch
  2878. Reset the classification history of the current task when switching to the
  2879. next task. Hardware will start the classification of the next task from
  2880. scratch.
  2881. Cc: Ben Segall <bsegall@google.com>
  2882. Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
  2883. Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
  2884. Cc: Ionela Voinescu <ionela.voinescu@arm.com>
  2885. Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
  2886. Cc: Len Brown <len.brown@intel.com>
  2887. Cc: Lukasz Luba <lukasz.luba@arm.com>
  2888. Cc: Mel Gorman <mgorman@suse.de>
  2889. Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  2890. Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  2891. Cc: Steven Rostedt <rostedt@goodmis.org>
  2892. Cc: Tim C. Chen <tim.c.chen@intel.com>
  2893. Cc: Valentin Schneider <vschneid@redhat.com>
  2894. Cc: x86@kernel.org
  2895. Cc: linux-pm@vger.kernel.org
  2896. Cc: linux-kernel@vger.kernel.org
  2897. Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
  2898. Patchset: intel-thread-director
  2899. ---
  2900. arch/x86/include/asm/hreset.h | 30 ++++++++++++++++++++++++++++++
  2901. arch/x86/kernel/cpu/common.c | 7 +++++++
  2902. arch/x86/kernel/process_32.c | 3 +++
  2903. arch/x86/kernel/process_64.c | 3 +++
  2904. 4 files changed, 43 insertions(+)
  2905. create mode 100644 arch/x86/include/asm/hreset.h
  2906. diff --git a/arch/x86/include/asm/hreset.h b/arch/x86/include/asm/hreset.h
  2907. new file mode 100644
  2908. index 0000000000000..d68ca2fb8642b
  2909. --- /dev/null
  2910. +++ b/arch/x86/include/asm/hreset.h
  2911. @@ -0,0 +1,30 @@
  2912. +/* SPDX-License-Identifier: GPL-2.0 */
  2913. +#ifndef _ASM_X86_HRESET_H
  2914. +
  2915. +/**
  2916. + * HRESET - History reset. Available since binutils v2.36.
  2917. + *
  2918. + * Request the processor to reset the history of task classification on the
  2919. + * current logical processor. The history components to be
  2920. + * reset are specified in %eax. Only bits specified in CPUID(0x20).EBX
  2921. + * and enabled in the IA32_HRESET_ENABLE MSR can be selected.
  2922. + *
  2923. + * The assembly code looks like:
  2924. + *
  2925. + * hreset %eax
  2926. + *
  2927. + * The corresponding machine code looks like:
  2928. + *
  2929. + * F3 0F 3A F0 ModRM Imm
  2930. + *
  2931. + * The value of ModRM is 0xc0 to specify %eax register addressing.
  2932. + * The ignored immediate operand is set to 0.
  2933. + *
  2934. + * The instruction is documented in the Intel SDM.
  2935. + */
  2936. +
  2937. +#define __ASM_HRESET ".byte 0xf3, 0xf, 0x3a, 0xf0, 0xc0, 0x0"
  2938. +
  2939. +void reset_hardware_history(void);
  2940. +
  2941. +#endif /* _ASM_X86_HRESET_H */
  2942. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
  2943. index 98b48cf80ec21..67bf6fbae082a 100644
  2944. --- a/arch/x86/kernel/cpu/common.c
  2945. +++ b/arch/x86/kernel/cpu/common.c
  2946. @@ -52,6 +52,7 @@
  2947. #include <asm/cpu.h>
  2948. #include <asm/mce.h>
  2949. #include <asm/msr.h>
  2950. +#include <asm/hreset.h>
  2951. #include <asm/memtype.h>
  2952. #include <asm/microcode.h>
  2953. #include <asm/microcode_intel.h>
  2954. @@ -413,6 +414,12 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
  2955. static u32 hardware_history_features __ro_after_init;
  2956. +void reset_hardware_history(void)
  2957. +{
  2958. + asm_inline volatile (ALTERNATIVE("", __ASM_HRESET, X86_FEATURE_HRESET)
  2959. + : : "a" (hardware_history_features) : "memory");
  2960. +}
  2961. +
  2962. static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
  2963. {
  2964. if (!cpu_feature_enabled(X86_FEATURE_HRESET))
  2965. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
  2966. index ceab14b6118f7..888cdb7624dcd 100644
  2967. --- a/arch/x86/kernel/process_32.c
  2968. +++ b/arch/x86/kernel/process_32.c
  2969. @@ -52,6 +52,7 @@
  2970. #include <asm/switch_to.h>
  2971. #include <asm/vm86.h>
  2972. #include <asm/resctrl.h>
  2973. +#include <asm/hreset.h>
  2974. #include <asm/proto.h>
  2975. #include "process.h"
  2976. @@ -214,6 +215,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  2977. /* Load the Intel cache allocation PQR MSR. */
  2978. resctrl_sched_in(next_p);
  2979. + reset_hardware_history();
  2980. +
  2981. return prev_p;
  2982. }
  2983. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
  2984. index 7f94dbbc397b7..d0d3c33237475 100644
  2985. --- a/arch/x86/kernel/process_64.c
  2986. +++ b/arch/x86/kernel/process_64.c
  2987. @@ -53,6 +53,7 @@
  2988. #include <asm/xen/hypervisor.h>
  2989. #include <asm/vdso.h>
  2990. #include <asm/resctrl.h>
  2991. +#include <asm/hreset.h>
  2992. #include <asm/unistd.h>
  2993. #include <asm/fsgsbase.h>
  2994. #ifdef CONFIG_IA32_EMULATION
  2995. @@ -658,6 +659,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  2996. /* Load the Intel cache allocation PQR MSR. */
  2997. resctrl_sched_in(next_p);
  2998. + reset_hardware_history();
  2999. +
  3000. return prev_p;
  3001. }
  3002. --
  3003. 2.39.2