{"id":"https://openalex.org/W2904487346","doi":"https://doi.org/10.1145/3291058","title":"Bandwidth and Locality Aware Task-stealing for Manycore Architectures with Bandwidth-Asymmetric Memory","display_name":"Bandwidth and Locality Aware Task-stealing for Manycore Architectures with Bandwidth-Asymmetric Memory","publication_year":2018,"publication_date":"2018-12-08","ids":{"openalex":"https://openalex.org/W2904487346","doi":"https://doi.org/10.1145/3291058","mag":"2904487346"},"language":"en","primary_location":{"id":"doi:10.1145/3291058","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3291058","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3291058","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3291058","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063596525","display_name":"Han Zhao","orcid":"https://orcid.org/0000-0002-1561-5329"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Han Zhao","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377840","display_name":"Quan Chen","orcid":"https://orcid.org/0000-0001-5832-0347"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Chen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-5832-0347","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037125614","display_name":"Yuxian Qiu","orcid":"https://orcid.org/0000-0003-4040-0159"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxian Qiu","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101931991","display_name":"Ming Wu","orcid":"https://orcid.org/0000-0002-6993-1431"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Wu","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101413554","display_name":"Yao Shen","orcid":"https://orcid.org/0000-0002-6744-1498"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Shen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003939279","display_name":"Jingwen Leng","orcid":"https://orcid.org/0000-0002-5660-5493"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingwen Leng","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100323095","display_name":"Chao Li","orcid":"https://orcid.org/0000-0001-6218-4659"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Li","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039318240","display_name":"Minyi Guo","orcid":"https://orcid.org/0000-0003-0034-2302"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Guo","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5063596525"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":1.4247,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.83367959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"15","issue":"4","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.888886570930481},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5817962288856506},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5444548726081848},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5097901225090027},{"id":"https://openalex.org/keywords/allocator","display_name":"Allocator","score":0.49698665738105774},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4916747212409973},{"id":"https://openalex.org/keywords/locality-of-reference","display_name":"Locality of reference","score":0.4702153503894806},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3728017508983612},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.24534839391708374},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.13514477014541626}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.888886570930481},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5817962288856506},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5444548726081848},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5097901225090027},{"id":"https://openalex.org/C162262903","wikidata":"https://www.wikidata.org/wiki/Q343527","display_name":"Allocator","level":2,"score":0.49698665738105774},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4916747212409973},{"id":"https://openalex.org/C27602214","wikidata":"https://www.wikidata.org/wiki/Q1868547","display_name":"Locality of reference","level":3,"score":0.4702153503894806},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3728017508983612},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.24534839391708374},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.13514477014541626},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3291058","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3291058","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3291058","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3291058","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3291058","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3291058","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G129499350","display_name":null,"funder_award_id":"61632017","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1423370025","display_name":null,"funder_award_id":"61602301","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3758062460","display_name":null,"funder_award_id":"61602301, 61632017","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2904487346.pdf","grobid_xml":"https://content.openalex.org/works/W2904487346.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W57462620","https://openalex.org/W320927244","https://openalex.org/W1545054551","https://openalex.org/W1579925828","https://openalex.org/W1657789554","https://openalex.org/W1773176621","https://openalex.org/W1914552515","https://openalex.org/W1943385891","https://openalex.org/W1982003698","https://openalex.org/W1986864222","https://openalex.org/W1994909161","https://openalex.org/W2000364684","https://openalex.org/W2031866163","https://openalex.org/W2032401773","https://openalex.org/W2037574360","https://openalex.org/W2043013658","https://openalex.org/W2055099017","https://openalex.org/W2063270401","https://openalex.org/W2078536718","https://openalex.org/W2079577430","https://openalex.org/W2084423328","https://openalex.org/W2087085699","https://openalex.org/W2089898547","https://openalex.org/W2096898433","https://openalex.org/W2100038678","https://openalex.org/W2108801243","https://openalex.org/W2117476908","https://openalex.org/W2119296805","https://openalex.org/W2119860027","https://openalex.org/W2121893797","https://openalex.org/W2129829929","https://openalex.org/W2130374657","https://openalex.org/W2146585781","https://openalex.org/W2148443252","https://openalex.org/W2153185479","https://openalex.org/W2157124218","https://openalex.org/W2157802978","https://openalex.org/W2166254818","https://openalex.org/W2171226522","https://openalex.org/W2172139828","https://openalex.org/W2261536765","https://openalex.org/W2461043886","https://openalex.org/W2485412435","https://openalex.org/W2508028078","https://openalex.org/W2512594982","https://openalex.org/W2554695184","https://openalex.org/W2609137082","https://openalex.org/W2733054703","https://openalex.org/W2746871167","https://openalex.org/W2756273951","https://openalex.org/W2761236734","https://openalex.org/W3145624740","https://openalex.org/W4245454516","https://openalex.org/W4256158364"],"related_works":["https://openalex.org/W2253315624","https://openalex.org/W1555349535","https://openalex.org/W2583128298","https://openalex.org/W2053359564","https://openalex.org/W2161159383","https://openalex.org/W1495260638","https://openalex.org/W1511204342","https://openalex.org/W2369125128","https://openalex.org/W2369223577","https://openalex.org/W2010020348"],"abstract_inverted_index":{"Parallel":[0],"computers":[1],"now":[2],"start":[3],"to":[4,93,102,110,148],"adopt":[5],"Bandwidth-Asymmetric":[6],"Memory":[7,19],"architecture":[8],"that":[9,133,138],"consists":[10,59],"of":[11,60,144],"traditional":[12,152],"DRAM":[13],"memory":[14,23,41,104,107,136],"and":[15,34,52,69,78],"new":[16],"High":[17],"Bandwidth":[18,51],"(HBM)":[20],"for":[21],"high":[22],"bandwidth.":[24,112],"However,":[25],"existing":[26],"task":[27],"schedulers":[28],"suffer":[29],"from":[30],"low":[31],"bandwidth":[32],"usage":[33,87],"poor":[35],"data":[36,63,80,83,94,101,117],"locality":[37,118],"problems":[38],"in":[39],"bandwidth-asymmetric":[40,135],"architectures.":[42],"To":[43],"solve":[44],"the":[45,82,97,141],"two":[46],"problems,":[47],"we":[48],"propose":[49],"a":[50,65,70,122],"Locality":[53],"Aware":[54],"Task-stealing":[55],"(BATS)":[56],"system,":[57],"which":[58],"an":[61,128],"HBM-aware":[62],"allocator,":[64],"bandwidth-aware":[66],"traffic":[67,98,105],"balancer,":[68],"hierarchical":[71,114],"task-stealing":[72,153],"scheduler.":[73],"Leveraging":[74],"compile-time":[75],"code":[76],"transformation":[77],"run-time":[79],"distribution,":[81],"allocator":[84],"enables":[85],"HBM":[86],"automatically":[88],"without":[89,121],"user":[90],"interference.":[91],"According":[92],"access":[95],"hotness,":[96],"balancer":[99],"migrates":[100],"balance":[103],"across":[106],"nodes":[108],"proportional":[109],"their":[111],"The":[113],"scheduler":[115],"improves":[116],"at":[119],"runtime":[120],"priori":[123],"program":[124],"knowledge.":[125],"Experiments":[126],"on":[127],"Intel":[129],"Knights":[130],"Landing":[131],"server":[132],"adopts":[134],"show":[137],"BATS":[139],"reduces":[140],"execution":[142],"time":[143],"memory-bound":[145],"programs":[146],"up":[147],"83.5%":[149],"compared":[150],"with":[151],"schedulers.":[154]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
