{"id":"https://openalex.org/W4210250808","doi":"https://doi.org/10.1109/tc.2022.3145164","title":"ParaX : Bandwidth-Efficient Instance Assignment for DL on Multi-NUMA Many-Core CPUs","display_name":"ParaX : Bandwidth-Efficient Instance Assignment for DL on Multi-NUMA Many-Core CPUs","publication_year":2022,"publication_date":"2022-01-31","ids":{"openalex":"https://openalex.org/W4210250808","doi":"https://doi.org/10.1109/tc.2022.3145164"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2022.3145164","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2022.3145164","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100395351","display_name":"Yiming Zhang","orcid":"https://orcid.org/0000-0001-6450-8485"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiming Zhang","raw_affiliation_strings":["NICEX Lab, School of Informatics, Xiamen University, Xiamen, Fujian, China"],"affiliations":[{"raw_affiliation_string":"NICEX Lab, School of Informatics, Xiamen University, Xiamen, Fujian, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071843722","display_name":"Lujia Yin","orcid":"https://orcid.org/0000-0003-2916-0756"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lujia Yin","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100440903","display_name":"Dongsheng Li","orcid":"https://orcid.org/0000-0001-9743-2034"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongsheng Li","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036258932","display_name":"Yuxing Peng","orcid":"https://orcid.org/0000-0003-1295-0911"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxing Peng","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100625354","display_name":"Kai L\u00fc","orcid":"https://orcid.org/0000-0002-6378-7002"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Lu","raw_affiliation_strings":["National University of Defense Technology, Changsha, Hunan, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, Hunan, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100395351"],"corresponding_institution_ids":["https://openalex.org/I191208505"],"apc_list":null,"apc_paid":null,"fwci":0.1007,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.33438938,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"71","issue":"11","first_page":"3032","last_page":"3046"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8432486057281494},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6661248803138733},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6418766975402832},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5208778381347656},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.5020751953125},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4910855293273926},{"id":"https://openalex.org/keywords/cpu-shielding","display_name":"CPU shielding","score":0.4716208279132843},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.4688877761363983},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.42411568760871887},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.34648454189300537}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8432486057281494},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6661248803138733},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6418766975402832},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5208778381347656},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.5020751953125},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4910855293273926},{"id":"https://openalex.org/C180613757","wikidata":"https://www.wikidata.org/wiki/Q5013757","display_name":"CPU shielding","level":3,"score":0.4716208279132843},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.4688877761363983},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.42411568760871887},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.34648454189300537},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2022.3145164","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2022.3145164","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5400000214576721}],"awards":[{"id":"https://openalex.org/G4765491843","display_name":null,"funder_award_id":"61872376","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6231216505","display_name":null,"funder_award_id":"61772541","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1598866093","https://openalex.org/W1788418780","https://openalex.org/W1836465849","https://openalex.org/W1976251241","https://openalex.org/W2064675550","https://openalex.org/W2083842231","https://openalex.org/W2108598243","https://openalex.org/W2143735977","https://openalex.org/W2155893237","https://openalex.org/W2186615578","https://openalex.org/W2194775991","https://openalex.org/W2316776689","https://openalex.org/W2323909431","https://openalex.org/W2405920868","https://openalex.org/W2525778437","https://openalex.org/W2604514113","https://openalex.org/W2605350416","https://openalex.org/W2622263826","https://openalex.org/W2794670651","https://openalex.org/W2897884253","https://openalex.org/W2916187335","https://openalex.org/W2925231804","https://openalex.org/W2950449148","https://openalex.org/W2962747323","https://openalex.org/W2963016543","https://openalex.org/W2963122961","https://openalex.org/W2963959597","https://openalex.org/W2971874466","https://openalex.org/W2983865192","https://openalex.org/W2990725985","https://openalex.org/W3001283725","https://openalex.org/W3016842236","https://openalex.org/W3150053915","https://openalex.org/W4297775537","https://openalex.org/W4301239768","https://openalex.org/W6635810480","https://openalex.org/W6638667902","https://openalex.org/W6668382375","https://openalex.org/W6686509673","https://openalex.org/W6713134421","https://openalex.org/W6714058667","https://openalex.org/W6726983090","https://openalex.org/W6727690538","https://openalex.org/W6737664043","https://openalex.org/W6739622702","https://openalex.org/W6748645090","https://openalex.org/W6751349269","https://openalex.org/W6759948036","https://openalex.org/W6762871624","https://openalex.org/W6764041785"],"related_works":["https://openalex.org/W2387982802","https://openalex.org/W1896942098","https://openalex.org/W1991061790","https://openalex.org/W2400763249","https://openalex.org/W1482063109","https://openalex.org/W2043940072","https://openalex.org/W2249399447","https://openalex.org/W2473478803","https://openalex.org/W2729363167","https://openalex.org/W2115229350"],"abstract_inverted_index":{"Commercial":[0],"clouds":[1],"now":[2],"heavily":[3],"use":[4],"CPUs":[5,16,36,60,146],"in":[6,62,118],"DL":[7,46,63,139],"(deep":[8],"learning)":[9],"because":[10],"there":[11],"are":[12,88],"large":[13,53],"numbers":[14],"of":[15,55,71,79,102,138,168,183,224],"which":[17,94,134,193,215],"would":[18],"otherwise":[19],"sit":[20],"idle":[21],"during":[22],"off-peak":[23],"periods.":[24],"Following":[25],"the":[26,67,76,80,84,119,136,170,177,181,184,196,200,222,247],"trend,":[27],"CPU":[28,56,116,153,165,241],"vendors":[29],"have":[30,229],"not":[31],"only":[32],"released":[33],"high-performance":[34],"many-core":[35,59,145],"but":[37],"also":[38],"developed":[39],"efficient":[40],"math":[41],"kernel":[42],"libraries.":[43],"However,":[44],"current":[45,92],"platforms":[47,93],"cannot":[48],"scale":[49],"well":[50],"to":[51,105,159,163,169,175,203,219],"a":[52,106,129,208,237],"number":[54],"cores,":[57],"making":[58],"inefficient":[61],"computation.":[64],"We":[65,228],"analyze":[66],"memory":[68,112,143,218],"access":[69],"patterns":[70],"various":[72],"layers":[73,121,198],"and":[74,115,124,152,206,257],"identify":[75],"root":[77],"cause":[78,110],"low":[81],"scalability,":[82],"i.e.,":[83],"per-layer":[85,178],"barriers":[86,109,179],"that":[87,243],"implicitly":[89],"imposed":[90],"by":[91,147,261],"assign":[95,160],"one":[96,100,161],"single":[97],"instance":[98,162],"(i.e.,":[99],"batch":[101],"input":[103],"data)":[104],"CPU.":[107],"The":[108],"severe":[111],"bandwidth":[113,150],"contention":[114,151],"starvation":[117],"access-intensive":[120,197],"(like":[122],"activation":[123],"BN).":[125],"This":[126],"paper":[127],"presents":[128],"novel":[130],"approach":[131],"called":[132],"ParaX,":[133],"boosts":[135],"performance":[137],"on":[140,180,232,236],"multi-NUMA":[141],"(non-uniform":[142],"access)":[144],"effectively":[148],"alleviating":[149],"starvation.":[154],"Our":[155],"key":[156],"idea":[157],"is":[158],"each":[164],"core":[166],"instead":[167],"entire":[171],"CPU,":[172],"so":[173],"as":[174],"remove":[176],"executions":[182],"many":[185],"cores.":[186],"ParaX":[187,231,244],"designs":[188],"an":[189],"ultralight":[190],"scheduling":[191],"policy":[192],"sufficiently":[194],"overlaps":[195],"with":[199],"compute-intensive":[201],"ones":[202],"avoid":[204],"contention,":[205],"proposes":[207],"NUMA-aware":[209],"gradient":[210],"server":[211],"mechanism":[212],"for":[213,250],"training":[214],"leverages":[216],"shared":[217],"substantially":[220],"reduce":[221],"overhead":[223],"per-iteration":[225],"parameter":[226],"synchronization.":[227],"implemented":[230],"MXNet.":[233],"Extensive":[234],"evaluation":[235],"two-NUMA":[238],"Intel":[239],"8280":[240],"shows":[242],"significantly":[245],"improves":[246],"training/inference":[248],"throughput":[249],"all":[251],"tested":[252],"models":[253],"(for":[254],"image":[255],"recognition":[256],"natural":[258],"language":[259],"processing)":[260],"<inline-formula><tex-math":[262],"notation=\"LaTeX\">$1.73\\times":[263],"\\sim":[264],"2.93{\\times}$</tex-math></inline-formula>":[265],".":[266]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
