{"id":"https://openalex.org/W7161117370","doi":"https://doi.org/10.1145/3746467.3801520","title":"Optimizing Deep Learning Inference on Multi-Core Laptop CPUs with Scheduling and Thread Affinity Strategies","display_name":"Optimizing Deep Learning Inference on Multi-Core Laptop CPUs with Scheduling and Thread Affinity Strategies","publication_year":2026,"publication_date":"2026-04-23","ids":{"openalex":"https://openalex.org/W7161117370","doi":"https://doi.org/10.1145/3746467.3801520"},"language":null,"primary_location":{"id":"doi:10.1145/3746467.3801520","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746467.3801520","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 ACM Southeast Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746467.3801520","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039818893","display_name":"Mohammod Akib Khan","orcid":null},"institutions":[{"id":"https://openalex.org/I172980758","display_name":"Kennesaw State University","ror":"https://ror.org/00jeqjx33","country_code":"US","type":"education","lineage":["https://openalex.org/I172980758"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mohammod Akib Khan","raw_affiliation_strings":["Computer Science, Kennesaw State University, MARIETTA, GA, USA"],"raw_orcid":"https://orcid.org/0000-0002-1517-5319","affiliations":[{"raw_affiliation_string":"Computer Science, Kennesaw State University, MARIETTA, GA, USA","institution_ids":["https://openalex.org/I172980758"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009170496","display_name":"Sahidul Islam","orcid":"https://orcid.org/0000-0002-4488-8182"},"institutions":[{"id":"https://openalex.org/I172980758","display_name":"Kennesaw State University","ror":"https://ror.org/00jeqjx33","country_code":"US","type":"education","lineage":["https://openalex.org/I172980758"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sahidul Islam","raw_affiliation_strings":["Computer Science, Kennesaw State University, MARIETTA, GA, USA"],"raw_orcid":"https://orcid.org/0000-0002-4488-8182","affiliations":[{"raw_affiliation_string":"Computer Science, Kennesaw State University, MARIETTA, GA, USA","institution_ids":["https://openalex.org/I172980758"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5039818893"],"corresponding_institution_ids":["https://openalex.org/I172980758"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.96505824,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"276","last_page":"280"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6704999804496765,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6704999804496765,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.07519999891519547,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.054999999701976776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.6015999913215637},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5958999991416931},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5644000172615051},{"id":"https://openalex.org/keywords/laptop","display_name":"Laptop","score":0.49549999833106995},{"id":"https://openalex.org/keywords/context-switch","display_name":"Context switch","score":0.47440001368522644},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4634999930858612},{"id":"https://openalex.org/keywords/inference-engine","display_name":"Inference engine","score":0.4578999876976013},{"id":"https://openalex.org/keywords/two-level-scheduling","display_name":"Two-level scheduling","score":0.3831999897956848}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7821000218391418},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.6015999913215637},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5958999991416931},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5644000172615051},{"id":"https://openalex.org/C2780008327","wikidata":"https://www.wikidata.org/wiki/Q3962","display_name":"Laptop","level":2,"score":0.49549999833106995},{"id":"https://openalex.org/C53833338","wikidata":"https://www.wikidata.org/wiki/Q1061424","display_name":"Context switch","level":2,"score":0.47440001368522644},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4634999930858612},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.4578999876976013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4016000032424927},{"id":"https://openalex.org/C119948110","wikidata":"https://www.wikidata.org/wiki/Q7858726","display_name":"Two-level scheduling","level":4,"score":0.3831999897956848},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37770000100135803},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.37310001254081726},{"id":"https://openalex.org/C31689143","wikidata":"https://www.wikidata.org/wiki/Q733809","display_name":"Fair-share scheduling","level":3,"score":0.37059998512268066},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.336899995803833},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3215999901294708},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3156000077724457},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C122141398","wikidata":"https://www.wikidata.org/wiki/Q5456330","display_name":"Fixed-priority pre-emptive scheduling","level":5,"score":0.2840999960899353},{"id":"https://openalex.org/C114073186","wikidata":"https://www.wikidata.org/wiki/Q2631895","display_name":"Automated planning and scheduling","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C127456818","wikidata":"https://www.wikidata.org/wiki/Q238879","display_name":"Rate-monotonic scheduling","level":4,"score":0.25440001487731934},{"id":"https://openalex.org/C175893541","wikidata":"https://www.wikidata.org/wiki/Q1196582","display_name":"Round-robin scheduling","level":4,"score":0.2508000135421753},{"id":"https://openalex.org/C107568181","wikidata":"https://www.wikidata.org/wiki/Q5319000","display_name":"Dynamic priority scheduling","level":3,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746467.3801520","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746467.3801520","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 ACM Southeast Conference","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3746467.3801520","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746467.3801520","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 ACM Southeast Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2251939518","https://openalex.org/W2922395136","https://openalex.org/W3128607506","https://openalex.org/W3144271226","https://openalex.org/W3154028478","https://openalex.org/W4220917304","https://openalex.org/W4385481898"],"related_works":[],"abstract_inverted_index":{"Running":[0],"modern":[1],"AI":[2],"models":[3],"efficiently":[4],"on":[5,32,58,70,81,193],"laptops":[6,194],"is":[7],"challenging":[8],"due":[9,175],"to":[10,43,147,176],"limited":[11],"computing":[12],"resources":[13],"and":[14,49,102,107,116,122,144,178,195],"the":[15,34,64,154],"absence":[16],"of":[17,66],"dedicated":[18],"GPUs.":[19],"Although":[20],"deep":[21,78],"learning":[22,79],"workloads":[23],"are":[24],"highly":[25],"parallel,":[26],"CPU":[27,120,142],"inference":[28,72,80,135,151,192],"performance":[29,174],"depends":[30],"heavily":[31],"how":[33],"operating":[35],"system":[36],"schedules":[37],"threads.":[38],"Poor":[39],"scheduling":[40,68,87,131,179,187],"can":[41,172,189],"lead":[42],"thread":[44,97,139,163],"migration,":[45],"context":[46,124],"switching":[47],"overhead,":[48],"reduced":[50],"cache":[51],"locality.":[52],"While":[53],"most":[54],"existing":[55],"work":[56,76],"focuses":[57],"model":[59,155],"design":[60],"or":[61,156,165],"hardware":[62],"acceleration,":[63],"effect":[65],"OS-level":[67],"behavior":[69],"real-time":[71,191],"remains":[73],"underexplored.":[74],"This":[75],"evaluates":[77],"a":[82],"multi-core":[83],"laptop":[84],"under":[85],"multiple":[86,113],"configurations,":[88],"including":[89],"default":[90],"OS":[91,130],"scheduling,":[92],"increased":[93],"intra-op":[94],"parallelism,":[95],"runtime-managed":[96],"affinity,":[98],"manual":[99],"core":[100],"pinning,":[101],"oversubscription":[103,177],"scenarios.":[104],"Using":[105],"convolutional":[106],"transformer":[108],"classification":[109],"models,":[110],"we":[111],"benchmark":[112],"micro-batch":[114],"sizes":[115],"measure":[117],"latency,":[118],"throughput,":[119],"utilization,":[121],"voluntary":[123],"switches.":[125],"Our":[126],"results":[127],"show":[128],"that":[129,184],"decisions":[132],"significantly":[133],"affect":[134],"performance.":[136],"ONNX":[137],"Runtime":[138],"affinity":[140,171],"improves":[141],"utilization":[143],"delivers":[145],"five":[146],"eight":[148],"times":[149],"faster":[150],"without":[152,169],"modifying":[153],"retraining":[157],"it.":[158],"In":[159],"contrast,":[160],"simply":[161],"increasing":[162],"count":[164],"manually":[166],"pinning":[167],"processes":[168],"coordinated":[170],"degrade":[173],"overhead.":[180],"These":[181],"findings":[182],"demonstrate":[183],"lightweight":[185],"runtime":[186],"strategies":[188],"improve":[190],"other":[196],"CPU-based":[197],"edge":[198],"devices.":[199]},"counts_by_year":[],"updated_date":"2026-05-15T06:12:33.780692","created_date":"2026-05-15T00:00:00"}
