1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268 |
- From 0289e120424c88695e731293bb4f2816bc1d8da6 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:29 -0800
- Subject: [PATCH] sched/fair: Generalize asym_packing logic for SMT cores
- When doing asym_packing load balancing between cores, all we care is that
- the destination core is fully idle (including SMT siblings, if any) and
- that the busiest candidate scheduling group has exactly one busy CPU. It is
- irrelevant whether the candidate busiest core is non-SMT, SMT2, SMT4, SMT8,
- etc.
- Do not handle the candidate busiest non-SMT vs SMT cases separately. Simply
- do the two checks described above. Let find_busiest_group() handle bigger
- imbalances in the number of idle CPUs.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Reviewed-by: Len Brown <len.brown@intel.com>
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 41 ++++++++++++++---------------------------
- 1 file changed, 14 insertions(+), 27 deletions(-)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index 0f87369914274..4509086a60a0d 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -9124,13 +9124,11 @@ group_type group_classify(unsigned int imbalance_pct,
- * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
- * only if @dst_cpu has higher priority.
- *
- - * If both @dst_cpu and @sg have SMT siblings, and @sg has exactly one more
- - * busy CPU than @sds::local, let @dst_cpu pull tasks if it has higher priority.
- - * Bigger imbalances in the number of busy CPUs will be dealt with in
- - * update_sd_pick_busiest().
- - *
- - * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
- - * of @dst_cpu are idle and @sg has lower priority.
- + * If @dst_cpu has SMT siblings, check if there are no running tasks in
- + * @sds::local. In such case, decide based on the priority of @sg. Do it only
- + * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
- + * imbalances in the number of busy CPUs will be dealt with in
- + * find_busiest_group().
- *
- * Return: true if @dst_cpu can pull tasks, false otherwise.
- */
- @@ -9139,12 +9137,10 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
- struct sched_group *sg)
- {
- #ifdef CONFIG_SCHED_SMT
- - bool local_is_smt, sg_is_smt;
- + bool local_is_smt;
- int sg_busy_cpus;
-
- local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
- - sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
- -
- sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
-
- if (!local_is_smt) {
- @@ -9165,25 +9161,16 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
- }
-
- - /* @dst_cpu has SMT siblings. */
- -
- - if (sg_is_smt) {
- - int local_busy_cpus = sds->local->group_weight -
- - sds->local_stat.idle_cpus;
- - int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
- -
- - if (busy_cpus_delta == 1)
- - return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
- -
- - return false;
- - }
- -
- /*
- - * @sg does not have SMT siblings. Ensure that @sds::local does not end
- - * up with more than one busy SMT sibling and only pull tasks if there
- - * are not busy CPUs (i.e., no CPU has running tasks).
- + * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
- + * all its siblings are idle (moving tasks between physical cores in
- + * which some SMT siblings are busy results in the same throughput).
- + *
- + * If the difference in the number of busy CPUs is two or more, let
- + * find_busiest_group() take care of it. We only care if @sg has
- + * exactly one busy CPU. This covers SMT and non-SMT sched groups.
- */
- - if (!sds->local_stat.sum_nr_running)
- + if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
-
- return false;
- --
- 2.39.2
- From e9da5836d3052648536258be7fbaec9f2f15862e Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:30 -0800
- Subject: [PATCH] sched/fair: Move is_core_idle() out of CONFIG_NUMA
- asym_packing needs this function to determine whether an SMT core is a
- suitable destination for load balancing.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 34 +++++++++++++++++-----------------
- 1 file changed, 17 insertions(+), 17 deletions(-)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index 4509086a60a0d..d58df9c6a88c4 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -1064,6 +1064,23 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
- * Scheduling class queueing methods:
- */
-
- +static inline bool is_core_idle(int cpu)
- +{
- +#ifdef CONFIG_SCHED_SMT
- + int sibling;
- +
- + for_each_cpu(sibling, cpu_smt_mask(cpu)) {
- + if (cpu == sibling)
- + continue;
- +
- + if (!idle_cpu(sibling))
- + return false;
- + }
- +#endif
- +
- + return true;
- +}
- +
- #ifdef CONFIG_NUMA
- #define NUMA_IMBALANCE_MIN 2
-
- @@ -1700,23 +1717,6 @@ struct numa_stats {
- int idle_cpu;
- };
-
- -static inline bool is_core_idle(int cpu)
- -{
- -#ifdef CONFIG_SCHED_SMT
- - int sibling;
- -
- - for_each_cpu(sibling, cpu_smt_mask(cpu)) {
- - if (cpu == sibling)
- - continue;
- -
- - if (!idle_cpu(sibling))
- - return false;
- - }
- -#endif
- -
- - return true;
- -}
- -
- struct task_numa_env {
- struct task_struct *p;
-
- --
- 2.39.2
- From e0ad77720e1ed2dc413aa9229442e8df0ee0f6ac Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:31 -0800
- Subject: [PATCH] sched/fair: Only do asym_packing load balancing from fully
- idle SMT cores
- When balancing load between cores, all the SMT siblings of the destination
- CPU, if any, must be idle. Otherwise, pulling new tasks degrades the
- throughput of the busy SMT siblings. The overall throughput of the system
- remains the same.
- When balancing load within an SMT core this consideration is not relevant
- relevant. Follow the priorities that hardware indicates.
- Using is_core_idle() renders checking !sds->local_stat.sum_nr_running
- redundant. Remove it.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Suggested-by: Valentin Schneider <vschneid@redhat.com>
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 34 +++++++++++++++++++++++++---------
- 1 file changed, 25 insertions(+), 9 deletions(-)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index d58df9c6a88c4..1b134a2f0585b 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -9120,12 +9120,14 @@ group_type group_classify(unsigned int imbalance_pct,
- * Check the state of the SMT siblings of both @sds::local and @sg and decide
- * if @dst_cpu can pull tasks.
- *
- + * This function must be called only if all the SMT siblings of @dst_cpu are
- + * idle, if any.
- + *
- * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
- * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
- * only if @dst_cpu has higher priority.
- *
- - * If @dst_cpu has SMT siblings, check if there are no running tasks in
- - * @sds::local. In such case, decide based on the priority of @sg. Do it only
- + * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
- * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
- * imbalances in the number of busy CPUs will be dealt with in
- * find_busiest_group().
- @@ -9162,15 +9164,13 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
- }
-
- /*
- - * @dst_cpu has SMT siblings. Do asym_packing load balancing only if
- - * all its siblings are idle (moving tasks between physical cores in
- - * which some SMT siblings are busy results in the same throughput).
- + * @dst_cpu has SMT siblings and are also idle.
- *
- * If the difference in the number of busy CPUs is two or more, let
- * find_busiest_group() take care of it. We only care if @sg has
- * exactly one busy CPU. This covers SMT and non-SMT sched groups.
- */
- - if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
- + if (sg_busy_cpus == 1)
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
-
- return false;
- @@ -9184,7 +9184,14 @@ static inline bool
- sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs,
- struct sched_group *group)
- {
- - /* Only do SMT checks if either local or candidate have SMT siblings */
- + /*
- + * If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
- + * is not sufficient. We need to make sure the whole core is idle.
- + */
- + if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
- + return false;
- +
- + /* Only do SMT checks if either local or candidate have SMT siblings. */
- if ((sds->local->flags & SD_SHARE_CPUCAPACITY) ||
- (group->flags & SD_SHARE_CPUCAPACITY))
- return asym_smt_can_pull_tasks(env->dst_cpu, sds, sgs, group);
- @@ -11131,8 +11138,17 @@ static void nohz_balancer_kick(struct rq *rq)
- */
- for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
- if (sched_asym_prefer(i, cpu)) {
- - flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- - goto unlock;
- + /*
- + * Always do ASYM_PACKING balance in the SMT
- + * domain. In upper domains, the core must be
- + * fully idle.
- + */
- + if (sd->flags & SD_SHARE_CPUCAPACITY ||
- + (!(sd->flags & SD_SHARE_CPUCAPACITY) &&
- + is_core_idle(i))) {
- + flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- + goto unlock;
- + }
- }
- }
- }
- --
- 2.39.2
- From 6894e2e70bb2dfe0a96d65a70c1e9a4005528211 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:32 -0800
- Subject: [PATCH] sched/fair: Let low-priority cores help high-priority busy
- SMT cores
- Using asym_packing priorities within an SMT core is straightforward. Just
- follow the priorities that hardware indicates.
- When balancing load from an SMT core, also consider the idle of its
- siblings. Priorities do not reflect that an SMT core divides its throughput
- among all its busy siblings. They only makes sense when exactly one sibling
- is busy.
- Indicate that active balance is needed if the destination CPU has lower
- priority than the source CPU but the latter has busy SMT siblings.
- Make find_busiest_queue() not skip higher-priority SMT cores with more than
- busy sibling.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Suggested-by: Valentin Schneider <vschneid@redhat.com>
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 31 ++++++++++++++++++++++++++-----
- 1 file changed, 26 insertions(+), 5 deletions(-)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index 1b134a2f0585b..1255d99877fea 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -10306,11 +10306,20 @@ static struct rq *find_busiest_queue(struct lb_env *env,
- nr_running == 1)
- continue;
-
- - /* Make sure we only pull tasks from a CPU of lower priority */
- + /*
- + * Make sure we only pull tasks from a CPU of lower priority
- + * when balancing between SMT siblings.
- + *
- + * If balancing between cores, let lower priority CPUs help
- + * SMT cores with more than one busy sibling.
- + */
- if ((env->sd->flags & SD_ASYM_PACKING) &&
- sched_asym_prefer(i, env->dst_cpu) &&
- - nr_running == 1)
- - continue;
- + nr_running == 1) {
- + if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
- + (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
- + continue;
- + }
-
- switch (env->migration_type) {
- case migrate_load:
- @@ -10400,8 +10409,20 @@ asym_active_balance(struct lb_env *env)
- * lower priority CPUs in order to pack all tasks in the
- * highest priority CPUs.
- */
- - return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
- - sched_asym_prefer(env->dst_cpu, env->src_cpu);
- + if (env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING)) {
- + /* Always obey priorities between SMT siblings. */
- + if (env->sd->flags & SD_SHARE_CPUCAPACITY)
- + return sched_asym_prefer(env->dst_cpu, env->src_cpu);
- +
- + /*
- + * A lower priority CPU can help an SMT core with more than one
- + * busy sibling.
- + */
- + return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
- + !is_core_idle(env->src_cpu);
- + }
- +
- + return false;
- }
-
- static inline bool
- --
- 2.39.2
- From aacb4416f1e6e04c9ef67e06855b7a4c26d33e3d Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:33 -0800
- Subject: [PATCH] sched/fair: Keep a fully_busy SMT sched group as busiest
- When comparing two fully_busy scheduling groups, keep the current busiest
- group if it represents an SMT core. Tasks in such scheduling group share
- CPU resources and need more help than tasks in a non-SMT fully_busy group.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 16 ++++++++++++++--
- 1 file changed, 14 insertions(+), 2 deletions(-)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index 1255d99877fea..ed1f13fa32f86 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -9384,10 +9384,22 @@ static bool update_sd_pick_busiest(struct lb_env *env,
- * contention when accessing shared HW resources.
- *
- * XXX for now avg_load is not computed and always 0 so we
- - * select the 1st one.
- + * select the 1st one, except if @sg is composed of SMT
- + * siblings.
- */
- - if (sgs->avg_load <= busiest->avg_load)
- +
- + if (sgs->avg_load < busiest->avg_load)
- return false;
- +
- + if (sgs->avg_load == busiest->avg_load) {
- + /*
- + * SMT sched groups need more help than non-SMT groups.
- + * If @sg happens to also be SMT, either choice is good.
- + */
- + if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
- + return false;
- + }
- +
- break;
-
- case group_has_spare:
- --
- 2.39.2
- From 6a40621091eafca8bc7d4ac2f178971046744a58 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:34 -0800
- Subject: [PATCH] sched/fair: Use the prefer_sibling flag of the current sched
- domain
- SD_PREFER_SIBLING is set from the SMT scheduling domain up to the first
- non-NUMA domain (the exception is systems with SD_ASYM_CPUCAPACITY).
- Above the SMT sched domain, all domains have a child. The SD_PREFER_
- SIBLING is honored always regardless of the scheduling domain at which the
- load balance takes place.
- There are cases, however, in which the busiest CPU's sched domain has
- child but the destination CPU's does not. Consider, for instance a non-SMT
- core (or an SMT core with only one online sibling) doing load balance with
- an SMT core at the MC level. SD_PREFER_SIBLING will not be honored. We are
- left with a fully busy SMT core and an idle non-SMT core.
- Avoid inconsistent behavior. Use the prefer_sibling behavior at the current
- scheduling domain, not its child.
- The NUMA sched domain does not have the SD_PREFER_SIBLING flag. Thus, we
- will not spread load among NUMA sched groups, as desired.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Suggested-by: Valentin Schneider <vschneid@redhat.com>
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index ed1f13fa32f86..9d94ba3f67269 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -9874,7 +9874,6 @@ static void update_idle_cpu_scan(struct lb_env *env,
-
- static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)
- {
- - struct sched_domain *child = env->sd->child;
- struct sched_group *sg = env->sd->groups;
- struct sg_lb_stats *local = &sds->local_stat;
- struct sg_lb_stats tmp_sgs;
- @@ -9915,9 +9914,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
- sg = sg->next;
- } while (sg != env->sd->groups);
-
- - /* Tag domain that child domain prefers tasks go to siblings first */
- - sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
- -
- + /*
- + * Tag domain that @env::sd prefers to spread excess tasks among
- + * sibling sched groups.
- + */
- + sds->prefer_sibling = env->sd->flags & SD_PREFER_SIBLING;
-
- if (env->sd->flags & SD_NUMA)
- env->fbq_type = fbq_classify_group(&sds->busiest_stat);
- @@ -10216,7 +10217,6 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
- goto out_balanced;
- }
-
- - /* Try to move all excess tasks to child's sibling domain */
- if (sds.prefer_sibling && local->group_type == group_has_spare &&
- busiest->sum_nr_running > local->sum_nr_running + 1)
- goto force_balance;
- --
- 2.39.2
- From b35c1dc0c7b494d014ffbc6e310506fb8c1b3457 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:35 -0800
- Subject: [PATCH] sched/fair: Do not even the number of busy CPUs via
- asym_packing
- Now that find_busiest_group() triggers load balancing between a fully_
- busy SMT2 core and an idle non-SMT core, it is no longer needed to force
- balancing via asym_packing. Use asym_packing only as intended: when there
- is high-priority CPU that is idle.
- After this change, the same logic apply to SMT and non-SMT local groups.
- Simplify asym_smt_can_pull_tasks() accordingly.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 37 +++++--------------------------------
- 1 file changed, 5 insertions(+), 32 deletions(-)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index 9d94ba3f67269..e5079ee882ff8 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -9117,20 +9117,15 @@ group_type group_classify(unsigned int imbalance_pct,
- * @sgs: Load-balancing statistics of the candidate busiest group
- * @sg: The candidate busiest group
- *
- - * Check the state of the SMT siblings of both @sds::local and @sg and decide
- - * if @dst_cpu can pull tasks.
- + * Check the state of the SMT siblings of @sg and decide if @dst_cpu can pull
- + * tasks.
- *
- * This function must be called only if all the SMT siblings of @dst_cpu are
- * idle, if any.
- *
- - * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
- - * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
- - * only if @dst_cpu has higher priority.
- - *
- - * If @dst_cpu has SMT siblings, decide based on the priority of @sg. Do it only
- - * if @sg has exactly one busy CPU (i.e., one more than @sds::local). Bigger
- - * imbalances in the number of busy CPUs will be dealt with in
- - * find_busiest_group().
- + * @dst_cpu can pull tasks if @sg has exactly one busy CPU (i.e., one more than
- + * @sds::local) and has lower group priority than @sds::local. Bigger imbalances
- + * in the number of busy CPUs will be dealt with in find_busiest_group().
- *
- * Return: true if @dst_cpu can pull tasks, false otherwise.
- */
- @@ -9139,33 +9134,11 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
- struct sched_group *sg)
- {
- #ifdef CONFIG_SCHED_SMT
- - bool local_is_smt;
- int sg_busy_cpus;
-
- - local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
- sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
-
- - if (!local_is_smt) {
- - /*
- - * If we are here, @dst_cpu is idle and does not have SMT
- - * siblings. Pull tasks if candidate group has two or more
- - * busy CPUs.
- - */
- - if (sg_busy_cpus >= 2) /* implies sg_is_smt */
- - return true;
- -
- - /*
- - * @dst_cpu does not have SMT siblings. @sg may have SMT
- - * siblings and only one is busy. In such case, @dst_cpu
- - * can help if it has higher priority and is idle (i.e.,
- - * it has no running tasks).
- - */
- - return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
- - }
- -
- /*
- - * @dst_cpu has SMT siblings and are also idle.
- - *
- * If the difference in the number of busy CPUs is two or more, let
- * find_busiest_group() take care of it. We only care if @sg has
- * exactly one busy CPU. This covers SMT and non-SMT sched groups.
- --
- 2.39.2
- From a06f6c7fbf4e42b2e8ff963d7b3d963550cc8ea3 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:36 -0800
- Subject: [PATCH] sched/topology: Remove SHARED_CHILD from ASYM_PACKING
- Only x86 and Power7 use ASYM_PACKING. They use it differently.
- Power7 has cores of equal priority, but the SMT siblings of a core have
- different priorities. Parent scheduling domains do not need (nor have) the
- ASYM_PACKING flag. SHARED_CHILD is not needed. Using SHARED_PARENT would
- cause the topology debug code to complain.
- X86 has cores of different priority, but all the SMT siblings of the core
- have equal priority. It needs ASYM_PACKING at the MC level, but not at the
- SMT level (it also needs it at upper levels if they have scheduling groups
- of different priority). Removing ASYM_PACKING from the SMT domain causes
- the topology debug code to complain.
- Remove SHARED_CHILD for now. We still need a topology check that satisfies
- both architectures.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Suggested-by: Valentin Schneider <vschneid@redhat.com>
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- include/linux/sched/sd_flags.h | 5 +----
- 1 file changed, 1 insertion(+), 4 deletions(-)
- diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
- index 57bde66d95f7a..800238854ba54 100644
- --- a/include/linux/sched/sd_flags.h
- +++ b/include/linux/sched/sd_flags.h
- @@ -132,12 +132,9 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
- /*
- * Place busy tasks earlier in the domain
- *
- - * SHARED_CHILD: Usually set on the SMT level. Technically could be set further
- - * up, but currently assumed to be set from the base domain
- - * upwards (see update_top_cache_domain()).
- * NEEDS_GROUPS: Load balancing flag.
- */
- -SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
- +SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS)
-
- /*
- * Prefer to place tasks in a sibling domain
- --
- 2.39.2
- From ae1ee00a1f2e7ea4ff86ad6f9fbce736960049f3 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:37 -0800
- Subject: [PATCH] x86/sched: Remove SD_ASYM_PACKING from the SMT domain flags
- There is no difference between any of the SMT siblings of a physical core.
- Do not do asym_packing load balancing at this level.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- arch/x86/kernel/smpboot.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
- diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
- index 55cad72715d99..0213d066a9a96 100644
- --- a/arch/x86/kernel/smpboot.c
- +++ b/arch/x86/kernel/smpboot.c
- @@ -547,7 +547,7 @@ static int x86_core_flags(void)
- #ifdef CONFIG_SCHED_SMT
- static int x86_smt_flags(void)
- {
- - return cpu_smt_flags() | x86_sched_itmt_flags();
- + return cpu_smt_flags();
- }
- #endif
- #ifdef CONFIG_SCHED_CLUSTER
- --
- 2.39.2
- From 03868cd4806db1cfd95e78ddaa203000b8aad97f Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 20:58:38 -0800
- Subject: [PATCH] x86/sched/itmt: Give all SMT siblings of a core the same
- priority
- X86 does not have the SD_ASYM_PACKING flag in the SMT domain. The scheduler
- knows how to handle SMT and non-SMT cores of different priority. There is
- no reason for SMT siblings of a core to have different priorities.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Reviewed-by: Len Brown <len.brown@intel.com>
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Tested-by: Zhang Rui <rui.zhang@intel.com>
- Patchset: intel-thread-director
- ---
- arch/x86/kernel/itmt.c | 23 +++++------------------
- 1 file changed, 5 insertions(+), 18 deletions(-)
- diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
- index 9ff480e94511b..6510883c5e817 100644
- --- a/arch/x86/kernel/itmt.c
- +++ b/arch/x86/kernel/itmt.c
- @@ -174,32 +174,19 @@ int arch_asym_cpu_priority(int cpu)
-
- /**
- * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
- - * @prio: Priority of cpu core
- - * @core_cpu: The cpu number associated with the core
- + * @prio: Priority of @cpu
- + * @cpu: The CPU number
- *
- * The pstate driver will find out the max boost frequency
- * and call this function to set a priority proportional
- - * to the max boost frequency. CPU with higher boost
- + * to the max boost frequency. CPUs with higher boost
- * frequency will receive higher priority.
- *
- * No need to rebuild sched domain after updating
- * the CPU priorities. The sched domains have no
- * dependency on CPU priorities.
- */
- -void sched_set_itmt_core_prio(int prio, int core_cpu)
- +void sched_set_itmt_core_prio(int prio, int cpu)
- {
- - int cpu, i = 1;
- -
- - for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
- - int smt_prio;
- -
- - /*
- - * Ensure that the siblings are moved to the end
- - * of the priority chain and only used when
- - * all other high priority cpus are out of capacity.
- - */
- - smt_prio = prio * smp_num_siblings / (i * i);
- - per_cpu(sched_core_priority, cpu) = smt_prio;
- - i++;
- - }
- + per_cpu(sched_core_priority, cpu) = prio;
- }
- --
- 2.39.2
- From 10a86fa64a25c1156d1de468366708274cdbf6b8 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:42 -0800
- Subject: [PATCH] sched/task_struct: Introduce IPC classes of tasks
- On hybrid processors, the architecture differences between the types of
- CPUs lead to different instructions-per-cycle (IPC) on each type of CPU.
- IPCs may differ further by the type of instructions. Instructions can be
- grouped into classes of similar IPCs.
- Hence, tasks can be classified into groups based on the type of
- instructions they execute.
- Add a new member task_struct::ipcc to associate a particular task to
- an IPC class that depends on the instructions it executes.
- The scheduler may use the IPC class of a task and data about the
- performance among CPUs of a given IPC class to improve throughput. It
- may, for instance, place certain classes of tasks on CPUs of higher
- performance.
- The methods to determine the classification of a task and its relative
- IPC score are specific to each CPU architecture.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- include/linux/sched.h | 10 ++++++++++
- init/Kconfig | 12 ++++++++++++
- 2 files changed, 22 insertions(+)
- diff --git a/include/linux/sched.h b/include/linux/sched.h
- index 853d08f7562bd..f292942178850 100644
- --- a/include/linux/sched.h
- +++ b/include/linux/sched.h
- @@ -127,6 +127,8 @@ struct task_group;
- __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
- TASK_PARKED)
-
- +#define IPC_CLASS_UNCLASSIFIED 0
- +
- #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
-
- #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
- @@ -1522,6 +1524,14 @@ struct task_struct {
- union rv_task_monitor rv[RV_PER_TASK_MONITORS];
- #endif
-
- +#ifdef CONFIG_IPC_CLASSES
- + /*
- + * A hardware-defined classification of task that reflects but is
- + * not identical to the number of instructions per cycle.
- + */
- + unsigned short ipcc;
- +#endif
- +
- /*
- * New fields for task_struct should be added above here, so that
- * they are included in the randomized portion of task_struct.
- diff --git a/init/Kconfig b/init/Kconfig
- index 44e90b28a30f1..24c5eec9d22e6 100644
- --- a/init/Kconfig
- +++ b/init/Kconfig
- @@ -867,6 +867,18 @@ config UCLAMP_BUCKETS_COUNT
-
- If in doubt, use the default value.
-
- +config IPC_CLASSES
- + bool "IPC classes of tasks"
- + depends on SMP
- + help
- + If selected, each task is assigned a classification value that
- + reflects the type of instructions that the task executes. This
- + classification reflects but is not equal to the number of
- + instructions retired per cycle.
- +
- + The scheduler uses the classification value to improve the placement
- + of tasks.
- +
- endmenu
-
- #
- --
- 2.39.2
- From 11597284e5e583ef060ff6ccc4a3aa619c672d26 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:43 -0800
- Subject: [PATCH] sched: Add interfaces for IPC classes
- Add the interfaces that architectures shall implement to convey the data
- to support IPC classes.
- arch_update_ipcc() updates the IPC classification of the current task as
- given by hardware.
- arch_get_ipcc_score() provides a performance score for a given IPC class
- when placed on a specific CPU. Higher scores indicate higher performance.
- When a driver or equivalent enablement code has configured the necessary
- hardware to support IPC classes, it should call sched_enable_ipc_classes()
- to notify the scheduler that it can start using IPC classes data.
- The number of classes and the score of each class of task are determined
- by hardware.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- include/linux/sched/topology.h | 6 ++++
- kernel/sched/sched.h | 66 ++++++++++++++++++++++++++++++++++
- kernel/sched/topology.c | 9 +++++
- 3 files changed, 81 insertions(+)
- diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
- index 816df6cc444e1..5b084d3c9ad12 100644
- --- a/include/linux/sched/topology.h
- +++ b/include/linux/sched/topology.h
- @@ -280,4 +280,10 @@ static inline int task_node(const struct task_struct *p)
- return cpu_to_node(task_cpu(p));
- }
-
- +#ifdef CONFIG_IPC_CLASSES
- +extern void sched_enable_ipc_classes(void);
- +#else
- +static inline void sched_enable_ipc_classes(void) { }
- +#endif
- +
- #endif /* _LINUX_SCHED_TOPOLOGY_H */
- diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
- index 771f8ddb70533..7ab65d3feaa16 100644
- --- a/kernel/sched/sched.h
- +++ b/kernel/sched/sched.h
- @@ -2526,6 +2526,72 @@ void arch_scale_freq_tick(void)
- }
- #endif
-
- +#ifdef CONFIG_IPC_CLASSES
- +DECLARE_STATIC_KEY_FALSE(sched_ipcc);
- +
- +static inline bool sched_ipcc_enabled(void)
- +{
- + return static_branch_unlikely(&sched_ipcc);
- +}
- +
- +#ifndef arch_update_ipcc
- +/**
- + * arch_update_ipcc() - Update the IPC class of the current task
- + * @curr: The current task
- + *
- + * Request that the IPC classification of @curr is updated.
- + *
- + * Returns: none
- + */
- +static __always_inline
- +void arch_update_ipcc(struct task_struct *curr)
- +{
- +}
- +#endif
- +
- +#ifndef arch_get_ipcc_score
- +
- +#define SCHED_IPCC_SCORE_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
- +/**
- + * arch_get_ipcc_score() - Get the IPC score of a class of task
- + * @ipcc: The IPC class
- + * @cpu: A CPU number
- + *
- + * The IPC performance scores reflects (but it is not identical to) the number
- + * of instructions retired per cycle for a given IPC class. It is a linear and
- + * abstract metric. Higher scores reflect better performance.
- + *
- + * The IPC score can be normalized with respect to the class, i, with the
- + * highest IPC score on the CPU, c, with highest performance:
- + *
- + * IPC(i, c)
- + * ------------------------------------ * SCHED_IPCC_SCORE_SCALE
- + * max(IPC(i, c) : (i, c))
- + *
- + * Scheduling schemes that want to use the IPC score along with other
- + * normalized metrics for scheduling (e.g., CPU capacity) may need to normalize
- + * it.
- + *
- + * Other scheduling schemes (e.g., asym_packing) do not need normalization.
- + *
- + * Returns the performance score of an IPC class, @ipcc, when running on @cpu.
- + * Error when either @ipcc or @cpu are invalid.
- + */
- +static __always_inline
- +unsigned long arch_get_ipcc_score(unsigned short ipcc, int cpu)
- +{
- + return SCHED_IPCC_SCORE_SCALE;
- +}
- +#endif
- +#else /* CONFIG_IPC_CLASSES */
- +
- +#define arch_get_ipcc_score(ipcc, cpu) (-EINVAL)
- +#define arch_update_ipcc(curr)
- +
- +static inline bool sched_ipcc_enabled(void) { return false; }
- +
- +#endif /* CONFIG_IPC_CLASSES */
- +
- #ifndef arch_scale_freq_capacity
- /**
- * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
- diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
- index 8739c2a5a54ea..60e03d15f58ca 100644
- --- a/kernel/sched/topology.c
- +++ b/kernel/sched/topology.c
- @@ -670,6 +670,15 @@ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
- DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
- DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
-
- +#ifdef CONFIG_IPC_CLASSES
- +DEFINE_STATIC_KEY_FALSE(sched_ipcc);
- +
- +void sched_enable_ipc_classes(void)
- +{
- + static_branch_enable_cpuslocked(&sched_ipcc);
- +}
- +#endif
- +
- static void update_top_cache_domain(int cpu)
- {
- struct sched_domain_shared *sds = NULL;
- --
- 2.39.2
- From db0b0e36404f9b091b52d5c1798ca3e875bf3728 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:44 -0800
- Subject: [PATCH] sched/core: Initialize the IPC class of a new task
- New tasks shall start life as unclassified. They will be classified by
- hardware when they run.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/core.c | 3 +++
- 1 file changed, 3 insertions(+)
- diff --git a/kernel/sched/core.c b/kernel/sched/core.c
- index 2a4918a1faa9e..325b1d3cf7a82 100644
- --- a/kernel/sched/core.c
- +++ b/kernel/sched/core.c
- @@ -4424,6 +4424,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
- p->se.prev_sum_exec_runtime = 0;
- p->se.nr_migrations = 0;
- p->se.vruntime = 0;
- +#ifdef CONFIG_IPC_CLASSES
- + p->ipcc = IPC_CLASS_UNCLASSIFIED;
- +#endif
- INIT_LIST_HEAD(&p->se.group_node);
-
- #ifdef CONFIG_FAIR_GROUP_SCHED
- --
- 2.39.2
- From 8250fc7b2d160a0638603d7575b0516a0ff1340e Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:45 -0800
- Subject: [PATCH] sched/core: Add user_tick as argument to scheduler_tick()
- Differentiate between user and kernel ticks so that the scheduler updates
- the IPC class of the current task during the former.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- include/linux/sched.h | 2 +-
- kernel/sched/core.c | 2 +-
- kernel/time/timer.c | 2 +-
- 3 files changed, 3 insertions(+), 3 deletions(-)
- diff --git a/include/linux/sched.h b/include/linux/sched.h
- index f292942178850..4f96c3dd59d0b 100644
- --- a/include/linux/sched.h
- +++ b/include/linux/sched.h
- @@ -293,7 +293,7 @@ enum {
- TASK_COMM_LEN = 16,
- };
-
- -extern void scheduler_tick(void);
- +extern void scheduler_tick(bool user_tick);
-
- #define MAX_SCHEDULE_TIMEOUT LONG_MAX
-
- diff --git a/kernel/sched/core.c b/kernel/sched/core.c
- index 325b1d3cf7a82..b438fc79f868f 100644
- --- a/kernel/sched/core.c
- +++ b/kernel/sched/core.c
- @@ -5550,7 +5550,7 @@ static inline u64 cpu_resched_latency(struct rq *rq) { return 0; }
- * This function gets called by the timer code, with HZ frequency.
- * We call it with interrupts disabled.
- */
- -void scheduler_tick(void)
- +void scheduler_tick(bool user_tick)
- {
- int cpu = smp_processor_id();
- struct rq *rq = cpu_rq(cpu);
- diff --git a/kernel/time/timer.c b/kernel/time/timer.c
- index 63a8ce7177dd4..e15e24105891f 100644
- --- a/kernel/time/timer.c
- +++ b/kernel/time/timer.c
- @@ -2073,7 +2073,7 @@ void update_process_times(int user_tick)
- if (in_irq())
- irq_work_tick();
- #endif
- - scheduler_tick();
- + scheduler_tick(user_tick);
- if (IS_ENABLED(CONFIG_POSIX_TIMERS))
- run_posix_cpu_timers();
- }
- --
- 2.39.2
- From 7151037d127499dfdb328d84ffc2f435aa3471ce Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:46 -0800
- Subject: [PATCH] sched/core: Update the IPC class of the current task
- When supported, hardware monitors the instruction stream to classify the
- current task. Hence, at userspace tick, we are ready to read the most
- recent classification result for the current task.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/core.c | 3 +++
- 1 file changed, 3 insertions(+)
- diff --git a/kernel/sched/core.c b/kernel/sched/core.c
- index b438fc79f868f..0ab39cc055c77 100644
- --- a/kernel/sched/core.c
- +++ b/kernel/sched/core.c
- @@ -5562,6 +5562,9 @@ void scheduler_tick(bool user_tick)
- if (housekeeping_cpu(cpu, HK_TYPE_TICK))
- arch_scale_freq_tick();
-
- + if (sched_ipcc_enabled() && user_tick)
- + arch_update_ipcc(curr);
- +
- sched_clock_tick();
-
- rq_lock(rq, &rf);
- --
- 2.39.2
- From 7bd90996a0cfd74c641d808c8975ab8aa5796572 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:47 -0800
- Subject: [PATCH] sched/fair: Collect load-balancing stats for IPC classes
- When selecting a busiest scheduling group, the IPC class of the current
- task can be used to select between two scheduling groups of types asym_
- packing or fully_busy that are otherwise identical.
- Compute the IPC class performance score for a scheduling group. It
- is the sum of the scores of the current tasks of all the runqueues.
- Also, keep track of the class of the task with the lowest IPC class score
- in the scheduling group.
- These two metrics will be used during idle load balancing to compute the
- current and the prospective IPC class score of a scheduling group.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 61 insertions(+)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index e5079ee882ff8..a418164953c36 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -8767,6 +8767,11 @@ struct sg_lb_stats {
- unsigned int nr_numa_running;
- unsigned int nr_preferred_running;
- #endif
- +#ifdef CONFIG_IPC_CLASSES
- + unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
- + unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
- + unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
- +#endif
- };
-
- /*
- @@ -9110,6 +9115,59 @@ group_type group_classify(unsigned int imbalance_pct,
- return group_has_spare;
- }
-
- +#ifdef CONFIG_IPC_CLASSES
- +static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
- +{
- + /* All IPCC stats have been set to zero in update_sg_lb_stats(). */
- + sgs->min_score = ULONG_MAX;
- +}
- +
- +/* Called only if cpu_of(@rq) is not idle and has tasks running. */
- +static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
- + struct rq *rq)
- +{
- + struct task_struct *curr;
- + unsigned short ipcc;
- + unsigned long score;
- +
- + if (!sched_ipcc_enabled())
- + return;
- +
- + curr = rcu_dereference(rq->curr);
- + if (!curr || (curr->flags & PF_EXITING) || is_idle_task(curr) ||
- + task_is_realtime(curr) ||
- + !cpumask_test_cpu(dst_cpu, curr->cpus_ptr))
- + return;
- +
- + ipcc = curr->ipcc;
- + score = arch_get_ipcc_score(ipcc, cpu_of(rq));
- +
- + /*
- + * Ignore tasks with invalid scores. When finding the busiest group, we
- + * prefer those with higher sum_score. This group will not be selected.
- + */
- + if (IS_ERR_VALUE(score))
- + return;
- +
- + sgs->sum_score += score;
- +
- + if (score < sgs->min_score) {
- + sgs->min_score = score;
- + sgs->min_ipcc = ipcc;
- + }
- +}
- +
- +#else /* CONFIG_IPC_CLASSES */
- +static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
- + struct rq *rq)
- +{
- +}
- +
- +static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
- +{
- +}
- +#endif /* CONFIG_IPC_CLASSES */
- +
- /**
- * asym_smt_can_pull_tasks - Check whether the load balancing CPU can pull tasks
- * @dst_cpu: Destination CPU of the load balancing
- @@ -9202,6 +9260,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
- int i, nr_running, local_group;
-
- memset(sgs, 0, sizeof(*sgs));
- + init_rq_ipcc_stats(sgs);
-
- local_group = group == sds->local;
-
- @@ -9251,6 +9310,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
- if (sgs->group_misfit_task_load < load)
- sgs->group_misfit_task_load = load;
- }
- +
- + update_sg_lb_ipcc_stats(env->dst_cpu, sgs, rq);
- }
-
- sgs->group_capacity = group->sgc->capacity;
- --
- 2.39.2
- From dcdc8c47500008e304dab90c7546127c8a056752 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:48 -0800
- Subject: [PATCH] sched/fair: Compute IPC class scores for load balancing
- Compute the joint total (both current and prospective) IPC class score of
- a scheduling group and the local scheduling group.
- These IPCC statistics are used during idle load balancing. The candidate
- scheduling group will have one fewer busy CPU after load balancing. This
- observation is important for cores with SMT support.
- The IPCC score of scheduling groups composed of SMT siblings needs to
- consider that the siblings share CPU resources. When computing the total
- IPCC score of the scheduling group, divide score of each sibling by the
- number of busy siblings.
- Collect IPCC statistics for asym_packing and fully_busy scheduling groups.
- When picking a busiest group, they are used to break ties between otherwise
- identical groups.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 68 insertions(+)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index a418164953c36..ae0c908be707e 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -8771,6 +8771,8 @@ struct sg_lb_stats {
- unsigned long min_score; /* Min(score(rq->curr->ipcc)) */
- unsigned short min_ipcc; /* Class of the task with the minimum IPCC score in the rq */
- unsigned long sum_score; /* Sum(score(rq->curr->ipcc)) */
- + long ipcc_score_after; /* Prospective IPCC score after load balancing */
- + unsigned long ipcc_score_before; /* IPCC score before load balancing */
- #endif
- };
-
- @@ -9157,6 +9159,62 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
- }
- }
-
- +static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
- + struct sched_group *sg,
- + struct lb_env *env)
- +{
- + unsigned long score_on_dst_cpu, before;
- + int busy_cpus;
- + long after;
- +
- + if (!sched_ipcc_enabled())
- + return;
- +
- + /*
- + * IPCC scores are only useful during idle load balancing. For now,
- + * only asym_packing uses IPCC scores.
- + */
- + if (!(env->sd->flags & SD_ASYM_PACKING) ||
- + env->idle == CPU_NOT_IDLE)
- + return;
- +
- + /*
- + * IPCC scores are used to break ties only between these types of
- + * groups.
- + */
- + if (sgs->group_type != group_fully_busy &&
- + sgs->group_type != group_asym_packing)
- + return;
- +
- + busy_cpus = sgs->group_weight - sgs->idle_cpus;
- +
- + /* No busy CPUs in the group. No tasks to move. */
- + if (!busy_cpus)
- + return;
- +
- + score_on_dst_cpu = arch_get_ipcc_score(sgs->min_ipcc, env->dst_cpu);
- +
- + /*
- + * Do not use IPC scores. sgs::ipcc_score_{after, before} will be zero
- + * and not used.
- + */
- + if (IS_ERR_VALUE(score_on_dst_cpu))
- + return;
- +
- + before = sgs->sum_score;
- + after = before - sgs->min_score;
- +
- + /* SMT siblings share throughput. */
- + if (busy_cpus > 1 && sg->flags & SD_SHARE_CPUCAPACITY) {
- + before /= busy_cpus;
- + /* One sibling will become idle after load balance. */
- + after /= busy_cpus - 1;
- + }
- +
- + sgs->ipcc_score_after = after + score_on_dst_cpu;
- + sgs->ipcc_score_before = before;
- +}
- +
- #else /* CONFIG_IPC_CLASSES */
- static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
- struct rq *rq)
- @@ -9166,6 +9224,13 @@ static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
- static void init_rq_ipcc_stats(struct sg_lb_stats *sgs)
- {
- }
- +
- +static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
- + struct sched_group *sg,
- + struct lb_env *env)
- +{
- +}
- +
- #endif /* CONFIG_IPC_CLASSES */
-
- /**
- @@ -9327,6 +9392,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
-
- sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
-
- + if (!local_group)
- + update_sg_lb_stats_scores(sgs, group, env);
- +
- /* Computing avg_load makes sense only when group is overloaded */
- if (sgs->group_type == group_overloaded)
- sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
- --
- 2.39.2
- From f5899b589a3df28df698309c8529262012cbfcbc Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:49 -0800
- Subject: [PATCH] sched/fair: Use IPCC stats to break ties between asym_packing
- sched groups
- As it iterates, update_sd_pick_busiest() keeps on selecting as busiest
- sched groups of identical priority. Since both groups have the same
- priority, either group is a good choice. The IPCC statistics provide a
- measure of the throughput before and after load balance. Use them to
- pick a busiest scheduling group from otherwise identical asym_packing
- scheduling groups.
- Pick as busiest the scheduling group that yields a higher IPCC score
- after load balancing.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 72 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 72 insertions(+)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index ae0c908be707e..cffb435e2b1c4 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -9215,6 +9215,60 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
- sgs->ipcc_score_before = before;
- }
-
- +/**
- + * sched_asym_ipcc_prefer - Select a sched group based on its IPCC score
- + * @a: Load balancing statistics of a sched group
- + * @b: Load balancing statistics of a second sched group
- + *
- + * Returns: true if @a has a higher IPCC score than @b after load balance.
- + * False otherwise.
- + */
- +static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
- + struct sg_lb_stats *b)
- +{
- + if (!sched_ipcc_enabled())
- + return false;
- +
- + /* @a increases overall throughput after load balance. */
- + if (a->ipcc_score_after > b->ipcc_score_after)
- + return true;
- +
- + /*
- + * If @a and @b yield the same overall throughput, pick @a if
- + * its current throughput is lower than that of @b.
- + */
- + if (a->ipcc_score_after == b->ipcc_score_after)
- + return a->ipcc_score_before < b->ipcc_score_before;
- +
- + return false;
- +}
- +
- +/**
- + * sched_asym_ipcc_pick - Select a sched group based on its IPCC score
- + * @a: A scheduling group
- + * @b: A second scheduling group
- + * @a_stats: Load balancing statistics of @a
- + * @b_stats: Load balancing statistics of @b
- + *
- + * Returns: true if @a has the same priority and @a has tasks with IPC classes
- + * that yield higher overall throughput after load balance. False otherwise.
- + */
- +static bool sched_asym_ipcc_pick(struct sched_group *a,
- + struct sched_group *b,
- + struct sg_lb_stats *a_stats,
- + struct sg_lb_stats *b_stats)
- +{
- + /*
- + * Only use the class-specific preference selection if both sched
- + * groups have the same priority.
- + */
- + if (arch_asym_cpu_priority(a->asym_prefer_cpu) !=
- + arch_asym_cpu_priority(b->asym_prefer_cpu))
- + return false;
- +
- + return sched_asym_ipcc_prefer(a_stats, b_stats);
- +}
- +
- #else /* CONFIG_IPC_CLASSES */
- static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
- struct rq *rq)
- @@ -9231,6 +9285,14 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
- {
- }
-
- +static bool sched_asym_ipcc_pick(struct sched_group *a,
- + struct sched_group *b,
- + struct sg_lb_stats *a_stats,
- + struct sg_lb_stats *b_stats)
- +{
- + return false;
- +}
- +
- #endif /* CONFIG_IPC_CLASSES */
-
- /**
- @@ -9466,6 +9528,16 @@ static bool update_sd_pick_busiest(struct lb_env *env,
- /* Prefer to move from lowest priority CPU's work */
- if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
- return false;
- +
- + /*
- + * Unlike other callers of sched_asym_prefer(), here both @sg
- + * and @sds::busiest have tasks running. When they have equal
- + * priority, their IPC class scores can be used to select a
- + * better busiest.
- + */
- + if (sched_asym_ipcc_pick(sds->busiest, sg, &sds->busiest_stat, sgs))
- + return false;
- +
- break;
-
- case group_misfit_task:
- --
- 2.39.2
- From 516bec260bf73b1f5c078755b96593849fd166d3 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:50 -0800
- Subject: [PATCH] sched/fair: Use IPCC stats to break ties between fully_busy
- SMT groups
- IPCC statistics are used during idle load balancing. After balancing one
- of the siblings of an SMT core will become idle. The rest of the busy
- siblings will enjoy increased throughput. The IPCC statistics provide
- a measure of the increased throughput. Use them to pick a busiest group
- from otherwise identical fully_busy scheduling groups (of which the
- avg_load is equal - and zero).
- Using IPCC scores to break ties with non-SMT fully_busy sched groups
- is not necessary. SMT sched groups always need more help.
- Add a stub sched_asym_ipcc_prefer() for !CONFIG_IPC_CLASSES.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 23 ++++++++++++++++++++---
- 1 file changed, 20 insertions(+), 3 deletions(-)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index cffb435e2b1c4..0996339df429b 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -9285,6 +9285,12 @@ static void update_sg_lb_stats_scores(struct sg_lb_stats *sgs,
- {
- }
-
- +static bool sched_asym_ipcc_prefer(struct sg_lb_stats *a,
- + struct sg_lb_stats *b)
- +{
- + return false;
- +}
- +
- static bool sched_asym_ipcc_pick(struct sched_group *a,
- struct sched_group *b,
- struct sg_lb_stats *a_stats,
- @@ -9568,10 +9574,21 @@ static bool update_sd_pick_busiest(struct lb_env *env,
- if (sgs->avg_load == busiest->avg_load) {
- /*
- * SMT sched groups need more help than non-SMT groups.
- - * If @sg happens to also be SMT, either choice is good.
- */
- - if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
- - return false;
- + if (sds->busiest->flags & SD_SHARE_CPUCAPACITY) {
- + if (!(sg->flags & SD_SHARE_CPUCAPACITY))
- + return false;
- +
- + /*
- + * Between two SMT groups, use IPCC scores to pick the
- + * one that would improve throughput the most (only
- + * asym_packing uses IPCC scores for now).
- + */
- + if (sched_ipcc_enabled() &&
- + env->sd->flags & SD_ASYM_PACKING &&
- + sched_asym_ipcc_prefer(busiest, sgs))
- + return false;
- + }
- }
-
- break;
- --
- 2.39.2
- From 442df79e3613c6db2f01a8489177d0edd366309d Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:51 -0800
- Subject: [PATCH] sched/fair: Use IPCC scores to select a busiest runqueue
- For two runqueues of equal priority and equal number of running of tasks,
- select the one whose current task would have the highest IPC class score
- if placed on the destination CPU.
- For now, use IPCC scores only for scheduling domains with the
- SD_ASYM_PACKING flag.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/fair.c | 64 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 64 insertions(+)
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index 0996339df429b..a9a105092e7c3 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -9269,6 +9269,37 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
- return sched_asym_ipcc_prefer(a_stats, b_stats);
- }
-
- +/**
- + * ipcc_score_delta - Get the IPCC score delta wrt the load balance's dst_cpu
- + * @p: A task
- + * @env: Load balancing environment
- + *
- + * Returns: The IPCC score delta that @p would get if placed in the destination
- + * CPU of @env. LONG_MIN to indicate that the delta should not be used.
- + */
- +static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
- +{
- + unsigned long score_src, score_dst;
- + unsigned short ipcc = p->ipcc;
- +
- + if (!sched_ipcc_enabled())
- + return LONG_MIN;
- +
- + /* Only asym_packing uses IPCC scores at the moment. */
- + if (!(env->sd->flags & SD_ASYM_PACKING))
- + return LONG_MIN;
- +
- + score_dst = arch_get_ipcc_score(ipcc, env->dst_cpu);
- + if (IS_ERR_VALUE(score_dst))
- + return LONG_MIN;
- +
- + score_src = arch_get_ipcc_score(ipcc, task_cpu(p));
- + if (IS_ERR_VALUE(score_src))
- + return LONG_MIN;
- +
- + return score_dst - score_src;
- +}
- +
- #else /* CONFIG_IPC_CLASSES */
- static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
- struct rq *rq)
- @@ -9299,6 +9330,11 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
- return false;
- }
-
- +static long ipcc_score_delta(struct task_struct *p, struct lb_env *env)
- +{
- + return LONG_MIN;
- +}
- +
- #endif /* CONFIG_IPC_CLASSES */
-
- /**
- @@ -10459,6 +10495,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
- {
- struct rq *busiest = NULL, *rq;
- unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
- + long busiest_ipcc_delta = LONG_MIN;
- unsigned int busiest_nr = 0;
- int i;
-
- @@ -10575,8 +10612,35 @@ static struct rq *find_busiest_queue(struct lb_env *env,
-
- case migrate_task:
- if (busiest_nr < nr_running) {
- + struct task_struct *curr;
- +
- busiest_nr = nr_running;
- busiest = rq;
- +
- + /*
- + * Remember the IPCC score delta of busiest::curr.
- + * We may need it to break a tie with other queues
- + * with equal nr_running.
- + */
- + curr = rcu_dereference(busiest->curr);
- + busiest_ipcc_delta = ipcc_score_delta(curr, env);
- + /*
- + * If rq and busiest have the same number of running
- + * tasks and IPC classes are supported, pick rq if doing
- + * so would give rq::curr a bigger IPC boost on dst_cpu.
- + */
- + } else if (busiest_nr == nr_running) {
- + struct task_struct *curr;
- + long delta;
- +
- + curr = rcu_dereference(rq->curr);
- + delta = ipcc_score_delta(curr, env);
- +
- + if (busiest_ipcc_delta < delta) {
- + busiest_ipcc_delta = delta;
- + busiest_nr = nr_running;
- + busiest = rq;
- + }
- }
- break;
-
- --
- 2.39.2
- From fa944aa2c7b296272c55a201a3aa40a84f9737a5 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:52 -0800
- Subject: [PATCH] thermal: intel: hfi: Introduce Intel Thread Director classes
- On Intel hybrid parts, each type of CPU has specific performance and
- energy efficiency capabilities. The Intel Thread Director technology
- extends the Hardware Feedback Interface (HFI) to provide performance and
- energy efficiency data for advanced classes of instructions.
- Add support to parse per-class capabilities.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- drivers/thermal/intel/intel_hfi.c | 30 ++++++++++++++++++++++++------
- 1 file changed, 24 insertions(+), 6 deletions(-)
- diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
- index 6e604bda2b939..2527ae3836c74 100644
- --- a/drivers/thermal/intel/intel_hfi.c
- +++ b/drivers/thermal/intel/intel_hfi.c
- @@ -77,7 +77,7 @@ union cpuid6_edx {
- * @ee_cap: Energy efficiency capability
- *
- * Capabilities of a logical processor in the HFI table. These capabilities are
- - * unitless.
- + * unitless and specific to each HFI class.
- */
- struct hfi_cpu_data {
- u8 perf_cap;
- @@ -89,7 +89,8 @@ struct hfi_cpu_data {
- * @perf_updated: Hardware updated performance capabilities
- * @ee_updated: Hardware updated energy efficiency capabilities
- *
- - * Properties of the data in an HFI table.
- + * Properties of the data in an HFI table. There exists one header per each
- + * HFI class.
- */
- struct hfi_hdr {
- u8 perf_updated;
- @@ -127,16 +128,21 @@ struct hfi_instance {
-
- /**
- * struct hfi_features - Supported HFI features
- + * @nr_classes: Number of classes supported
- * @nr_table_pages: Size of the HFI table in 4KB pages
- * @cpu_stride: Stride size to locate the capability data of a logical
- * processor within the table (i.e., row stride)
- + * @class_stride: Stride size to locate a class within the capability
- + * data of a logical processor or the HFI table header
- * @hdr_size: Size of the table header
- *
- * Parameters and supported features that are common to all HFI instances
- */
- struct hfi_features {
- + unsigned int nr_classes;
- size_t nr_table_pages;
- unsigned int cpu_stride;
- + unsigned int class_stride;
- unsigned int hdr_size;
- };
-
- @@ -333,8 +339,8 @@ static void init_hfi_cpu_index(struct hfi_cpu_info *info)
- }
-
- /*
- - * The format of the HFI table depends on the number of capabilities that the
- - * hardware supports. Keep a data structure to navigate the table.
- + * The format of the HFI table depends on the number of capabilities and classes
- + * that the hardware supports. Keep a data structure to navigate the table.
- */
- static void init_hfi_instance(struct hfi_instance *hfi_instance)
- {
- @@ -515,18 +521,30 @@ static __init int hfi_parse_features(void)
- /* The number of 4KB pages required by the table */
- hfi_features.nr_table_pages = edx.split.table_pages + 1;
-
- + /*
- + * Capability fields of an HFI class are grouped together. Classes are
- + * contiguous in memory. Hence, use the number of supported features to
- + * locate a specific class.
- + */
- + hfi_features.class_stride = nr_capabilities;
- +
- + /* For now, use only one class of the HFI table */
- + hfi_features.nr_classes = 1;
- +
- /*
- * The header contains change indications for each supported feature.
- * The size of the table header is rounded up to be a multiple of 8
- * bytes.
- */
- - hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
- + hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities *
- + hfi_features.nr_classes, 8) * 8;
-
- /*
- * Data of each logical processor is also rounded up to be a multiple
- * of 8 bytes.
- */
- - hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
- + hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities *
- + hfi_features.nr_classes, 8) * 8;
-
- return 0;
- }
- --
- 2.39.2
- From 61b13cb56dcd43bfa7ef1a94ae93fb4f9d45b7dc Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:53 -0800
- Subject: [PATCH] x86/cpufeatures: Add the Intel Thread Director feature
- definitions
- Intel Thread Director (ITD) provides hardware resources to classify
- the current task. The classification reflects the type of instructions that
- a task currently executes.
- ITD extends the Hardware Feedback Interface table to provide performance
- and energy efficiency capabilities for each of the supported classes of
- tasks.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- arch/x86/include/asm/cpufeatures.h | 1 +
- arch/x86/include/asm/disabled-features.h | 8 +++++++-
- arch/x86/kernel/cpu/cpuid-deps.c | 1 +
- 3 files changed, 9 insertions(+), 1 deletion(-)
- diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
- index 8f39c46197b82..a2f2730737aeb 100644
- --- a/arch/x86/include/asm/cpufeatures.h
- +++ b/arch/x86/include/asm/cpufeatures.h
- @@ -345,6 +345,7 @@
- #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
- #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
- #define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
- +#define X86_FEATURE_ITD (14*32+23) /* Intel Thread Director */
-
- /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
- #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
- diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
- index c44b56f7ffba0..0edd9bef7f2ed 100644
- --- a/arch/x86/include/asm/disabled-features.h
- +++ b/arch/x86/include/asm/disabled-features.h
- @@ -99,6 +99,12 @@
- # define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
- #endif
-
- +#ifdef CONFIG_IPC_CLASSES
- +# define DISABLE_ITD 0
- +#else
- +# define DISABLE_ITD (1 << (X86_FEATURE_ITD & 31))
- +#endif
- +
- /*
- * Make sure to add features to the correct mask
- */
- @@ -117,7 +123,7 @@
- DISABLE_CALL_DEPTH_TRACKING)
- #define DISABLED_MASK12 0
- #define DISABLED_MASK13 0
- -#define DISABLED_MASK14 0
- +#define DISABLED_MASK14 (DISABLE_ITD)
- #define DISABLED_MASK15 0
- #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
- DISABLE_ENQCMD)
- diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
- index d952211171292..277f157e067e5 100644
- --- a/arch/x86/kernel/cpu/cpuid-deps.c
- +++ b/arch/x86/kernel/cpu/cpuid-deps.c
- @@ -79,6 +79,7 @@ static const struct cpuid_dep cpuid_deps[] = {
- { X86_FEATURE_XFD, X86_FEATURE_XSAVES },
- { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
- { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
- + { X86_FEATURE_ITD, X86_FEATURE_HFI },
- {}
- };
-
- --
- 2.39.2
- From b32f2ed414ebd4bef042aa2529acdefbad0352a2 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:54 -0800
- Subject: [PATCH] thermal: intel: hfi: Store per-CPU IPCC scores
- The scheduler reads the IPCC scores when balancing load. These reads can
- be quite frequent. Hardware can also update the HFI table frequently.
- Concurrent access may cause a lot of lock contention. It gets worse as the
- number of CPUs increases.
- Instead, create separate per-CPU IPCC scores that the scheduler can read
- without the HFI table lock.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
- 1 file changed, 46 insertions(+)
- diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
- index 2527ae3836c74..b06021828892c 100644
- --- a/drivers/thermal/intel/intel_hfi.c
- +++ b/drivers/thermal/intel/intel_hfi.c
- @@ -29,6 +29,7 @@
- #include <linux/kernel.h>
- #include <linux/math.h>
- #include <linux/mutex.h>
- +#include <linux/percpu.h>
- #include <linux/percpu-defs.h>
- #include <linux/printk.h>
- #include <linux/processor.h>
- @@ -170,6 +171,43 @@ static struct workqueue_struct *hfi_updates_wq;
- #define HFI_UPDATE_INTERVAL HZ
- #define HFI_MAX_THERM_NOTIFY_COUNT 16
-
- +#ifdef CONFIG_IPC_CLASSES
- +static int __percpu *hfi_ipcc_scores;
- +
- +static int alloc_hfi_ipcc_scores(void)
- +{
- + if (!cpu_feature_enabled(X86_FEATURE_ITD))
- + return 0;
- +
- + hfi_ipcc_scores = __alloc_percpu(sizeof(*hfi_ipcc_scores) *
- + hfi_features.nr_classes,
- + sizeof(*hfi_ipcc_scores));
- +
- + return !hfi_ipcc_scores;
- +}
- +
- +static void set_hfi_ipcc_score(void *caps, int cpu)
- +{
- + int i, *hfi_class;
- +
- + if (!cpu_feature_enabled(X86_FEATURE_ITD))
- + return;
- +
- + hfi_class = per_cpu_ptr(hfi_ipcc_scores, cpu);
- +
- + for (i = 0; i < hfi_features.nr_classes; i++) {
- + struct hfi_cpu_data *class_caps;
- +
- + class_caps = caps + i * hfi_features.class_stride;
- + WRITE_ONCE(hfi_class[i], class_caps->perf_cap);
- + }
- +}
- +
- +#else
- +static int alloc_hfi_ipcc_scores(void) { return 0; }
- +static void set_hfi_ipcc_score(void *caps, int cpu) { }
- +#endif /* CONFIG_IPC_CLASSES */
- +
- static void get_hfi_caps(struct hfi_instance *hfi_instance,
- struct thermal_genl_cpu_caps *cpu_caps)
- {
- @@ -192,6 +230,8 @@ static void get_hfi_caps(struct hfi_instance *hfi_instance,
- cpu_caps[i].efficiency = caps->ee_cap << 2;
-
- ++i;
- +
- + set_hfi_ipcc_score(caps, cpu);
- }
- raw_spin_unlock_irq(&hfi_instance->table_lock);
- }
- @@ -580,8 +620,14 @@ void __init intel_hfi_init(void)
- if (!hfi_updates_wq)
- goto err_nomem;
-
- + if (alloc_hfi_ipcc_scores())
- + goto err_ipcc;
- +
- return;
-
- +err_ipcc:
- + destroy_workqueue(hfi_updates_wq);
- +
- err_nomem:
- for (j = 0; j < i; ++j) {
- hfi_instance = &hfi_instances[j];
- --
- 2.39.2
- From 9b519ff89b08af84eb947598643a71fddcc6a263 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:55 -0800
- Subject: [PATCH] thermal: intel: hfi: Update the IPC class of the current task
- Use Intel Thread Director classification to update the IPC class of a
- task. Implement the arch_update_ipcc() interface of the scheduler.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- arch/x86/include/asm/topology.h | 6 ++++++
- drivers/thermal/intel/intel_hfi.c | 32 +++++++++++++++++++++++++++++++
- 2 files changed, 38 insertions(+)
- diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
- index 458c891a82736..ffcdac3f398f0 100644
- --- a/arch/x86/include/asm/topology.h
- +++ b/arch/x86/include/asm/topology.h
- @@ -227,4 +227,10 @@ void init_freq_invariance_cppc(void);
- #define arch_init_invariance_cppc init_freq_invariance_cppc
- #endif
-
- +#if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
- +void intel_hfi_update_ipcc(struct task_struct *curr);
- +
- +#define arch_update_ipcc intel_hfi_update_ipcc
- +#endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
- +
- #endif /* _ASM_X86_TOPOLOGY_H */
- diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
- index b06021828892c..530dcf57e06e2 100644
- --- a/drivers/thermal/intel/intel_hfi.c
- +++ b/drivers/thermal/intel/intel_hfi.c
- @@ -72,6 +72,17 @@ union cpuid6_edx {
- u32 full;
- };
-
- +#ifdef CONFIG_IPC_CLASSES
- +union hfi_thread_feedback_char_msr {
- + struct {
- + u64 classid : 8;
- + u64 __reserved : 55;
- + u64 valid : 1;
- + } split;
- + u64 full;
- +};
- +#endif
- +
- /**
- * struct hfi_cpu_data - HFI capabilities per CPU
- * @perf_cap: Performance capability
- @@ -174,6 +185,27 @@ static struct workqueue_struct *hfi_updates_wq;
- #ifdef CONFIG_IPC_CLASSES
- static int __percpu *hfi_ipcc_scores;
-
- +void intel_hfi_update_ipcc(struct task_struct *curr)
- +{
- + union hfi_thread_feedback_char_msr msr;
- +
- + /* We should not be here if ITD is not supported. */
- + if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
- + pr_warn_once("task classification requested but not supported!");
- + return;
- + }
- +
- + rdmsrl(MSR_IA32_HW_FEEDBACK_CHAR, msr.full);
- + if (!msr.split.valid)
- + return;
- +
- + /*
- + * 0 is a valid classification for Intel Thread Director. A scheduler
- + * IPCC class of 0 means that the task is unclassified. Adjust.
- + */
- + curr->ipcc = msr.split.classid + 1;
- +}
- +
- static int alloc_hfi_ipcc_scores(void)
- {
- if (!cpu_feature_enabled(X86_FEATURE_ITD))
- --
- 2.39.2
- From 4cd93c9b598e57aa752639a4d93240d54ca89f23 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:56 -0800
- Subject: [PATCH] thermal: intel: hfi: Report the IPC class score of a CPU
- Implement the arch_get_ipcc_score() interface of the scheduler. Use the
- performance capabilities of the extended Hardware Feedback Interface table
- as the IPC score.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- arch/x86/include/asm/topology.h | 2 ++
- drivers/thermal/intel/intel_hfi.c | 27 +++++++++++++++++++++++++++
- 2 files changed, 29 insertions(+)
- diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
- index ffcdac3f398f0..c4fcd9c3c634f 100644
- --- a/arch/x86/include/asm/topology.h
- +++ b/arch/x86/include/asm/topology.h
- @@ -229,8 +229,10 @@ void init_freq_invariance_cppc(void);
-
- #if defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL)
- void intel_hfi_update_ipcc(struct task_struct *curr);
- +unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu);
-
- #define arch_update_ipcc intel_hfi_update_ipcc
- +#define arch_get_ipcc_score intel_hfi_get_ipcc_score
- #endif /* defined(CONFIG_IPC_CLASSES) && defined(CONFIG_INTEL_HFI_THERMAL) */
-
- #endif /* _ASM_X86_TOPOLOGY_H */
- diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
- index 530dcf57e06e2..fa9b4a678d926 100644
- --- a/drivers/thermal/intel/intel_hfi.c
- +++ b/drivers/thermal/intel/intel_hfi.c
- @@ -206,6 +206,33 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
- curr->ipcc = msr.split.classid + 1;
- }
-
- +unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
- +{
- + unsigned short hfi_class;
- + int *scores;
- +
- + if (cpu < 0 || cpu >= nr_cpu_ids)
- + return -EINVAL;
- +
- + if (ipcc == IPC_CLASS_UNCLASSIFIED)
- + return -EINVAL;
- +
- + /*
- + * Scheduler IPC classes start at 1. HFI classes start at 0.
- + * See note intel_hfi_update_ipcc().
- + */
- + hfi_class = ipcc - 1;
- +
- + if (hfi_class >= hfi_features.nr_classes)
- + return -EINVAL;
- +
- + scores = per_cpu_ptr(hfi_ipcc_scores, cpu);
- + if (!scores)
- + return -ENODEV;
- +
- + return READ_ONCE(scores[hfi_class]);
- +}
- +
- static int alloc_hfi_ipcc_scores(void)
- {
- if (!cpu_feature_enabled(X86_FEATURE_ITD))
- --
- 2.39.2
- From 6452cc53bb25d5f4716f2e59ae3900452315b9be Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:57 -0800
- Subject: [PATCH] thermal: intel: hfi: Define a default class for unclassified
- tasks
- A task may be unclassified if it has been recently created, spend most of
- its lifetime sleeping, or hardware has not provided a classification.
- Most tasks will be eventually classified as scheduler's IPC class 1
- (HFI class 0). This class corresponds to the capabilities in the legacy,
- classless, HFI table.
- IPC class 1 is a reasonable choice until hardware provides an actual
- classification. Meanwhile, the scheduler will place classes of tasks with
- higher IPC scores on higher-performance CPUs.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- drivers/thermal/intel/intel_hfi.c | 15 ++++++++++++++-
- 1 file changed, 14 insertions(+), 1 deletion(-)
- diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
- index fa9b4a678d926..7ea6acce7107e 100644
- --- a/drivers/thermal/intel/intel_hfi.c
- +++ b/drivers/thermal/intel/intel_hfi.c
- @@ -185,6 +185,19 @@ static struct workqueue_struct *hfi_updates_wq;
- #ifdef CONFIG_IPC_CLASSES
- static int __percpu *hfi_ipcc_scores;
-
- +/*
- + * A task may be unclassified if it has been recently created, spend most of
- + * its lifetime sleeping, or hardware has not provided a classification.
- + *
- + * Most tasks will be classified as scheduler's IPC class 1 (HFI class 0)
- + * eventually. Meanwhile, the scheduler will place classes of tasks with higher
- + * IPC scores on higher-performance CPUs.
- + *
- + * IPC class 1 is a reasonable choice. It matches the performance capability
- + * of the legacy, classless, HFI table.
- + */
- +#define HFI_UNCLASSIFIED_DEFAULT 1
- +
- void intel_hfi_update_ipcc(struct task_struct *curr)
- {
- union hfi_thread_feedback_char_msr msr;
- @@ -215,7 +228,7 @@ unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
- return -EINVAL;
-
- if (ipcc == IPC_CLASS_UNCLASSIFIED)
- - return -EINVAL;
- + ipcc = HFI_UNCLASSIFIED_DEFAULT;
-
- /*
- * Scheduler IPC classes start at 1. HFI classes start at 0.
- --
- 2.39.2
- From 44126224fe2556862b2324fbff03fd627e195080 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:58 -0800
- Subject: [PATCH] thermal: intel: hfi: Enable the Intel Thread Director
- Enable Intel Thread Director from the CPU hotplug callback: globally from
- CPU0 and then enable the thread-classification hardware in each logical
- processor individually.
- Also, initialize the number of classes supported.
- Let the scheduler know that it can start using IPC classes.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- arch/x86/include/asm/msr-index.h | 2 ++
- drivers/thermal/intel/intel_hfi.c | 40 +++++++++++++++++++++++++++++--
- 2 files changed, 40 insertions(+), 2 deletions(-)
- diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
- index 978a3e203cdbb..35ca36a7f8896 100644
- --- a/arch/x86/include/asm/msr-index.h
- +++ b/arch/x86/include/asm/msr-index.h
- @@ -1099,6 +1099,8 @@
- /* Hardware Feedback Interface */
- #define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
- #define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
- +#define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
- +#define MSR_IA32_HW_FEEDBACK_CHAR 0x17d2
-
- /* x2APIC locked status */
- #define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
- diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
- index 7ea6acce7107e..35d947f475508 100644
- --- a/drivers/thermal/intel/intel_hfi.c
- +++ b/drivers/thermal/intel/intel_hfi.c
- @@ -48,6 +48,8 @@
- /* Hardware Feedback Interface MSR configuration bits */
- #define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
- #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
- +#define HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT BIT(1)
- +#define HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT BIT(0)
-
- /* CPUID detection and enumeration definitions for HFI */
-
- @@ -72,6 +74,15 @@ union cpuid6_edx {
- u32 full;
- };
-
- +union cpuid6_ecx {
- + struct {
- + u32 dont_care0:8;
- + u32 nr_classes:8;
- + u32 dont_care1:16;
- + } split;
- + u32 full;
- +};
- +
- #ifdef CONFIG_IPC_CLASSES
- union hfi_thread_feedback_char_msr {
- struct {
- @@ -506,6 +517,11 @@ void intel_hfi_online(unsigned int cpu)
-
- init_hfi_cpu_index(info);
-
- + if (cpu_feature_enabled(X86_FEATURE_ITD)) {
- + msr_val = HW_FEEDBACK_THREAD_CONFIG_ENABLE_BIT;
- + wrmsrl(MSR_IA32_HW_FEEDBACK_THREAD_CONFIG, msr_val);
- + }
- +
- /*
- * Now check if the HFI instance of the package/die of @cpu has been
- * initialized (by checking its header). In such case, all we have to
- @@ -561,8 +577,22 @@ void intel_hfi_online(unsigned int cpu)
- */
- rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
- msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
- +
- + if (cpu_feature_enabled(X86_FEATURE_ITD))
- + msr_val |= HW_FEEDBACK_CONFIG_ITD_ENABLE_BIT;
- +
- wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
-
- + /*
- + * We have all we need to support IPC classes. Task classification is
- + * now working.
- + *
- + * All class scores are zero until after the first HFI update. That is
- + * OK. The scheduler queries these scores at every load balance.
- + */
- + if (cpu_feature_enabled(X86_FEATURE_ITD))
- + sched_enable_ipc_classes();
- +
- unlock:
- mutex_unlock(&hfi_instance_lock);
- return;
- @@ -640,8 +670,14 @@ static __init int hfi_parse_features(void)
- */
- hfi_features.class_stride = nr_capabilities;
-
- - /* For now, use only one class of the HFI table */
- - hfi_features.nr_classes = 1;
- + if (cpu_feature_enabled(X86_FEATURE_ITD)) {
- + union cpuid6_ecx ecx;
- +
- + ecx.full = cpuid_ecx(CPUID_HFI_LEAF);
- + hfi_features.nr_classes = ecx.split.nr_classes;
- + } else {
- + hfi_features.nr_classes = 1;
- + }
-
- /*
- * The header contains change indications for each supported feature.
- --
- 2.39.2
- From 734cc5407daf6d98ff6c89f79bf1f794635f7617 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:10:59 -0800
- Subject: [PATCH] sched/task_struct: Add helpers for IPC classification
- The unprocessed classification that hardware provides for a task may not
- be usable by the scheduler: the classification may change too frequently or
- architectures may want to consider extra factors. For instance, some
- processors with Intel Thread Director need to consider the state of the SMT
- siblings of a core.
- Provide per-task helper variables that architectures can use to post-
- process the classification that hardware provides.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- include/linux/sched.h | 12 +++++++++++-
- 1 file changed, 11 insertions(+), 1 deletion(-)
- diff --git a/include/linux/sched.h b/include/linux/sched.h
- index 4f96c3dd59d0b..582e14cf3f765 100644
- --- a/include/linux/sched.h
- +++ b/include/linux/sched.h
- @@ -1529,7 +1529,17 @@ struct task_struct {
- * A hardware-defined classification of task that reflects but is
- * not identical to the number of instructions per cycle.
- */
- - unsigned short ipcc;
- + unsigned int ipcc : 9;
- + /*
- + * A candidate classification that arch-specific implementations
- + * qualify for correctness.
- + */
- + unsigned int ipcc_tmp : 9;
- + /*
- + * Counter to filter out transient candidate classifications
- + * of a task.
- + */
- + unsigned int ipcc_cntr : 14;
- #endif
-
- /*
- --
- 2.39.2
- From 41d3fb0009d226f33935191790774bec3460c3e1 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:11:00 -0800
- Subject: [PATCH] sched/core: Initialize helpers of task classification
- Just as tasks start life unclassified, initialize the classification
- auxiliar variables.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- kernel/sched/core.c | 2 ++
- 1 file changed, 2 insertions(+)
- diff --git a/kernel/sched/core.c b/kernel/sched/core.c
- index 0ab39cc055c77..2a942fc3c3094 100644
- --- a/kernel/sched/core.c
- +++ b/kernel/sched/core.c
- @@ -4426,6 +4426,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
- p->se.vruntime = 0;
- #ifdef CONFIG_IPC_CLASSES
- p->ipcc = IPC_CLASS_UNCLASSIFIED;
- + p->ipcc_tmp = IPC_CLASS_UNCLASSIFIED;
- + p->ipcc_cntr = 0;
- #endif
- INIT_LIST_HEAD(&p->se.group_node);
-
- --
- 2.39.2
- From 4e8dc94941042de9905f32f1d8e1a49e8893d631 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:11:01 -0800
- Subject: [PATCH] sched/fair: Introduce sched_smt_siblings_idle()
- X86 needs to know the idle state of the SMT siblings of a CPU to improve
- the accuracy of IPCC classification. X86 implements support for IPC classes
- in the thermal HFI driver.
- Rename is_core_idle() as sched_smt_siblings_idle() and make it available
- outside the scheduler code.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- include/linux/sched.h | 2 ++
- kernel/sched/fair.c | 21 +++++++++++++++------
- 2 files changed, 17 insertions(+), 6 deletions(-)
- diff --git a/include/linux/sched.h b/include/linux/sched.h
- index 582e14cf3f765..f2adf662eda83 100644
- --- a/include/linux/sched.h
- +++ b/include/linux/sched.h
- @@ -2440,4 +2440,6 @@ static inline void sched_core_fork(struct task_struct *p) { }
-
- extern void sched_set_stop_task(int cpu, struct task_struct *stop);
-
- +extern bool sched_smt_siblings_idle(int cpu);
- +
- #endif
- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
- index a9a105092e7c3..97c574d5fa575 100644
- --- a/kernel/sched/fair.c
- +++ b/kernel/sched/fair.c
- @@ -1064,7 +1064,14 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
- * Scheduling class queueing methods:
- */
-
- -static inline bool is_core_idle(int cpu)
- +/**
- + * sched_smt_siblings_idle - Check whether SMT siblings of a CPU are idle
- + * @cpu: The CPU to check
- + *
- + * Returns true if all the SMT siblings of @cpu are idle or @cpu does not have
- + * SMT siblings. The idle state of @cpu is not considered.
- + */
- +bool sched_smt_siblings_idle(int cpu)
- {
- #ifdef CONFIG_SCHED_SMT
- int sibling;
- @@ -1767,7 +1774,7 @@ static inline int numa_idle_core(int idle_core, int cpu)
- * Prefer cores instead of packing HT siblings
- * and triggering future load balancing.
- */
- - if (is_core_idle(cpu))
- + if (sched_smt_siblings_idle(cpu))
- idle_core = cpu;
-
- return idle_core;
- @@ -9388,7 +9395,8 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
- * If the destination CPU has SMT siblings, env->idle != CPU_NOT_IDLE
- * is not sufficient. We need to make sure the whole core is idle.
- */
- - if (sds->local->flags & SD_SHARE_CPUCAPACITY && !is_core_idle(env->dst_cpu))
- + if (sds->local->flags & SD_SHARE_CPUCAPACITY &&
- + !sched_smt_siblings_idle(env->dst_cpu))
- return false;
-
- /* Only do SMT checks if either local or candidate have SMT siblings. */
- @@ -10557,7 +10565,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
- sched_asym_prefer(i, env->dst_cpu) &&
- nr_running == 1) {
- if (env->sd->flags & SD_SHARE_CPUCAPACITY ||
- - (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && is_core_idle(i)))
- + (!(env->sd->flags & SD_SHARE_CPUCAPACITY) &&
- + sched_smt_siblings_idle(i)))
- continue;
- }
-
- @@ -10686,7 +10695,7 @@ asym_active_balance(struct lb_env *env)
- * busy sibling.
- */
- return sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
- - !is_core_idle(env->src_cpu);
- + !sched_smt_siblings_idle(env->src_cpu);
- }
-
- return false;
- @@ -11433,7 +11442,7 @@ static void nohz_balancer_kick(struct rq *rq)
- */
- if (sd->flags & SD_SHARE_CPUCAPACITY ||
- (!(sd->flags & SD_SHARE_CPUCAPACITY) &&
- - is_core_idle(i))) {
- + sched_smt_siblings_idle(i))) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto unlock;
- }
- --
- 2.39.2
- From 0552b24fd1c1d40cd5b4a32d07afae3f3136d6c2 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:11:02 -0800
- Subject: [PATCH] thermal: intel: hfi: Implement model-specific checks for task
- classification
- In Alder Lake and Raptor Lake, the result of thread classification is more
- accurate when only one SMT sibling is busy. Classification results for
- class 2 and 3 are always reliable.
- To avoid unnecessary migrations, only update the class of a task if it has
- been the same during 4 consecutive user ticks.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- drivers/thermal/intel/intel_hfi.c | 60 ++++++++++++++++++++++++++++++-
- 1 file changed, 59 insertions(+), 1 deletion(-)
- diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
- index 35d947f475508..fdb53e4cabc14 100644
- --- a/drivers/thermal/intel/intel_hfi.c
- +++ b/drivers/thermal/intel/intel_hfi.c
- @@ -40,6 +40,7 @@
- #include <linux/workqueue.h>
-
- #include <asm/msr.h>
- +#include <asm/intel-family.h>
-
- #include "../thermal_core.h"
- #include "intel_hfi.h"
- @@ -209,9 +210,64 @@ static int __percpu *hfi_ipcc_scores;
- */
- #define HFI_UNCLASSIFIED_DEFAULT 1
-
- +#define CLASS_DEBOUNCER_SKIPS 4
- +
- +/**
- + * debounce_and_update_class() - Process and update a task's classification
- + *
- + * @p: The task of which the classification will be updated
- + * @new_ipcc: The new IPC classification
- + *
- + * Update the classification of @p with the new value that hardware provides.
- + * Only update the classification of @p if it has been the same during
- + * CLASS_DEBOUNCER_SKIPS consecutive ticks.
- + */
- +static void debounce_and_update_class(struct task_struct *p, u8 new_ipcc)
- +{
- + u16 debounce_skip;
- +
- + /* The class of @p changed. Only restart the debounce counter. */
- + if (p->ipcc_tmp != new_ipcc) {
- + p->ipcc_cntr = 1;
- + goto out;
- + }
- +
- + /*
- + * The class of @p did not change. Update it if it has been the same
- + * for CLASS_DEBOUNCER_SKIPS user ticks.
- + */
- + debounce_skip = p->ipcc_cntr + 1;
- + if (debounce_skip < CLASS_DEBOUNCER_SKIPS)
- + p->ipcc_cntr++;
- + else
- + p->ipcc = new_ipcc;
- +
- +out:
- + p->ipcc_tmp = new_ipcc;
- +}
- +
- +static bool classification_is_accurate(u8 hfi_class, bool smt_siblings_idle)
- +{
- + switch (boot_cpu_data.x86_model) {
- + case INTEL_FAM6_ALDERLAKE:
- + case INTEL_FAM6_ALDERLAKE_L:
- + case INTEL_FAM6_RAPTORLAKE:
- + case INTEL_FAM6_RAPTORLAKE_P:
- + case INTEL_FAM6_RAPTORLAKE_S:
- + if (hfi_class == 3 || hfi_class == 2 || smt_siblings_idle)
- + return true;
- +
- + return false;
- +
- + default:
- + return true;
- + }
- +}
- +
- void intel_hfi_update_ipcc(struct task_struct *curr)
- {
- union hfi_thread_feedback_char_msr msr;
- + bool idle;
-
- /* We should not be here if ITD is not supported. */
- if (!cpu_feature_enabled(X86_FEATURE_ITD)) {
- @@ -227,7 +283,9 @@ void intel_hfi_update_ipcc(struct task_struct *curr)
- * 0 is a valid classification for Intel Thread Director. A scheduler
- * IPCC class of 0 means that the task is unclassified. Adjust.
- */
- - curr->ipcc = msr.split.classid + 1;
- + idle = sched_smt_siblings_idle(task_cpu(curr));
- + if (classification_is_accurate(msr.split.classid, idle))
- + debounce_and_update_class(curr, msr.split.classid + 1);
- }
-
- unsigned long intel_hfi_get_ipcc_score(unsigned short ipcc, int cpu)
- --
- 2.39.2
- From ea77b647f82ae1b9b57f60841b2aad7cb89bbc92 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:11:03 -0800
- Subject: [PATCH] x86/cpufeatures: Add feature bit for HRESET
- The HRESET instruction prevents the classification of the current task
- from influencing the classification of the next task when running serially
- on the same logical processor.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- arch/x86/include/asm/cpufeatures.h | 1 +
- arch/x86/include/asm/msr-index.h | 4 +++-
- arch/x86/kernel/cpu/scattered.c | 1 +
- 3 files changed, 5 insertions(+), 1 deletion(-)
- diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
- index a2f2730737aeb..0a64e6bc67b13 100644
- --- a/arch/x86/include/asm/cpufeatures.h
- +++ b/arch/x86/include/asm/cpufeatures.h
- @@ -307,6 +307,7 @@
- #define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
- #define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
- #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
- +#define X86_FEATURE_HRESET (11*32+23) /* Hardware history reset instruction */
-
- /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
- #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
- diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
- index 35ca36a7f8896..4e6b1eddd7339 100644
- --- a/arch/x86/include/asm/msr-index.h
- +++ b/arch/x86/include/asm/msr-index.h
- @@ -1102,6 +1102,9 @@
- #define MSR_IA32_HW_FEEDBACK_THREAD_CONFIG 0x17d4
- #define MSR_IA32_HW_FEEDBACK_CHAR 0x17d2
-
- +/* Hardware History Reset */
- +#define MSR_IA32_HW_HRESET_ENABLE 0x17da
- +
- /* x2APIC locked status */
- #define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
- #define LEGACY_XAPIC_DISABLED BIT(0) /*
- @@ -1109,5 +1112,4 @@
- * disabling x2APIC will cause
- * a #GP
- */
- -
- #endif /* _ASM_X86_MSR_INDEX_H */
- diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
- index f53944fb8f7f9..66bc5713644dc 100644
- --- a/arch/x86/kernel/cpu/scattered.c
- +++ b/arch/x86/kernel/cpu/scattered.c
- @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
- { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
- { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
- + { X86_FEATURE_HRESET, CPUID_EAX, 22, 0x00000007, 1 },
- { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
- { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
- { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
- --
- 2.39.2
- From 98f46411379b4192bc6070a38628c32e880854a8 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:11:04 -0800
- Subject: [PATCH] x86/hreset: Configure history reset
- Configure the MSR that controls the behavior of HRESET on each logical
- processor.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- arch/x86/kernel/cpu/common.c | 23 ++++++++++++++++++++++-
- 1 file changed, 22 insertions(+), 1 deletion(-)
- diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
- index 6a25e93f2a87c..ae250426af286 100644
- --- a/arch/x86/kernel/cpu/common.c
- +++ b/arch/x86/kernel/cpu/common.c
- @@ -412,6 +412,26 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
- cr4_clear_bits(X86_CR4_UMIP);
- }
-
- +static u32 hardware_history_features __ro_after_init;
- +
- +static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
- +{
- + if (!cpu_feature_enabled(X86_FEATURE_HRESET))
- + return;
- +
- + /*
- + * Use on all CPUs the hardware history features that the boot
- + * CPU supports.
- + */
- + if (c == &boot_cpu_data)
- + hardware_history_features = cpuid_ebx(0x20);
- +
- + if (!hardware_history_features)
- + return;
- +
- + wrmsrl(MSR_IA32_HW_HRESET_ENABLE, hardware_history_features);
- +}
- +
- /* These bits should not change their value after CPU init is finished. */
- static const unsigned long cr4_pinned_mask =
- X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
- @@ -1849,10 +1869,11 @@ static void identify_cpu(struct cpuinfo_x86 *c)
- /* Disable the PN if appropriate */
- squash_the_stupid_serial_number(c);
-
- - /* Set up SMEP/SMAP/UMIP */
- + /* Set up SMEP/SMAP/UMIP/HRESET */
- setup_smep(c);
- setup_smap(c);
- setup_umip(c);
- + setup_hreset(c);
-
- /* Enable FSGSBASE instructions if available. */
- if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
- --
- 2.39.2
- From 296067cf1027b437407e587a6cb2a0a7bdf6c503 Mon Sep 17 00:00:00 2001
- From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Date: Mon, 6 Feb 2023 21:11:05 -0800
- Subject: [PATCH] x86/process: Reset hardware history in context switch
- Reset the classification history of the current task when switching to the
- next task. Hardware will start the classification of the next task from
- scratch.
- Cc: Ben Segall <bsegall@google.com>
- Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
- Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
- Cc: Ionela Voinescu <ionela.voinescu@arm.com>
- Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
- Cc: Len Brown <len.brown@intel.com>
- Cc: Lukasz Luba <lukasz.luba@arm.com>
- Cc: Mel Gorman <mgorman@suse.de>
- Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- Cc: Steven Rostedt <rostedt@goodmis.org>
- Cc: Tim C. Chen <tim.c.chen@intel.com>
- Cc: Valentin Schneider <vschneid@redhat.com>
- Cc: x86@kernel.org
- Cc: linux-pm@vger.kernel.org
- Cc: linux-kernel@vger.kernel.org
- Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
- Patchset: intel-thread-director
- ---
- arch/x86/include/asm/hreset.h | 30 ++++++++++++++++++++++++++++++
- arch/x86/kernel/cpu/common.c | 7 +++++++
- arch/x86/kernel/process_32.c | 3 +++
- arch/x86/kernel/process_64.c | 3 +++
- 4 files changed, 43 insertions(+)
- create mode 100644 arch/x86/include/asm/hreset.h
- diff --git a/arch/x86/include/asm/hreset.h b/arch/x86/include/asm/hreset.h
- new file mode 100644
- index 0000000000000..d68ca2fb8642b
- --- /dev/null
- +++ b/arch/x86/include/asm/hreset.h
- @@ -0,0 +1,30 @@
- +/* SPDX-License-Identifier: GPL-2.0 */
- +#ifndef _ASM_X86_HRESET_H
- +
- +/**
- + * HRESET - History reset. Available since binutils v2.36.
- + *
- + * Request the processor to reset the history of task classification on the
- + * current logical processor. The history components to be
- + * reset are specified in %eax. Only bits specified in CPUID(0x20).EBX
- + * and enabled in the IA32_HRESET_ENABLE MSR can be selected.
- + *
- + * The assembly code looks like:
- + *
- + * hreset %eax
- + *
- + * The corresponding machine code looks like:
- + *
- + * F3 0F 3A F0 ModRM Imm
- + *
- + * The value of ModRM is 0xc0 to specify %eax register addressing.
- + * The ignored immediate operand is set to 0.
- + *
- + * The instruction is documented in the Intel SDM.
- + */
- +
- +#define __ASM_HRESET ".byte 0xf3, 0xf, 0x3a, 0xf0, 0xc0, 0x0"
- +
- +void reset_hardware_history(void);
- +
- +#endif /* _ASM_X86_HRESET_H */
- diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
- index ae250426af286..c5c835c2a6195 100644
- --- a/arch/x86/kernel/cpu/common.c
- +++ b/arch/x86/kernel/cpu/common.c
- @@ -53,6 +53,7 @@
- #include <asm/mce.h>
- #include <asm/msr.h>
- #include <asm/cacheinfo.h>
- +#include <asm/hreset.h>
- #include <asm/memtype.h>
- #include <asm/microcode.h>
- #include <asm/microcode_intel.h>
- @@ -414,6 +415,12 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
-
- static u32 hardware_history_features __ro_after_init;
-
- +void reset_hardware_history(void)
- +{
- + asm_inline volatile (ALTERNATIVE("", __ASM_HRESET, X86_FEATURE_HRESET)
- + : : "a" (hardware_history_features) : "memory");
- +}
- +
- static __always_inline void setup_hreset(struct cpuinfo_x86 *c)
- {
- if (!cpu_feature_enabled(X86_FEATURE_HRESET))
- diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
- index 708c87b88cc15..7353bb119e79c 100644
- --- a/arch/x86/kernel/process_32.c
- +++ b/arch/x86/kernel/process_32.c
- @@ -52,6 +52,7 @@
- #include <asm/switch_to.h>
- #include <asm/vm86.h>
- #include <asm/resctrl.h>
- +#include <asm/hreset.h>
- #include <asm/proto.h>
-
- #include "process.h"
- @@ -214,6 +215,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
- /* Load the Intel cache allocation PQR MSR. */
- resctrl_sched_in(next_p);
-
- + reset_hardware_history();
- +
- return prev_p;
- }
-
- diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
- index bb65a68b4b499..eb204809890d2 100644
- --- a/arch/x86/kernel/process_64.c
- +++ b/arch/x86/kernel/process_64.c
- @@ -53,6 +53,7 @@
- #include <asm/xen/hypervisor.h>
- #include <asm/vdso.h>
- #include <asm/resctrl.h>
- +#include <asm/hreset.h>
- #include <asm/unistd.h>
- #include <asm/fsgsbase.h>
- #ifdef CONFIG_IA32_EMULATION
- @@ -658,6 +659,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
- /* Load the Intel cache allocation PQR MSR. */
- resctrl_sched_in(next_p);
-
- + reset_hardware_history();
- +
- return prev_p;
- }
-
- --
- 2.39.2
|