{"id":"https://openalex.org/W2996377014","doi":"https://doi.org/10.1145/3330152","title":"Thread Batching for High-performance Energy-efficient GPU Memory Design","display_name":"Thread Batching for High-performance Energy-efficient GPU Memory Design","publication_year":2019,"publication_date":"2019-10-31","ids":{"openalex":"https://openalex.org/W2996377014","doi":"https://doi.org/10.1145/3330152","mag":"2996377014"},"language":"en","primary_location":{"id":"doi:10.1145/3330152","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3330152","pdf_url":null,"source":{"id":"https://openalex.org/S96198239","display_name":"ACM Journal on Emerging Technologies in Computing Systems","issn_l":"1550-4832","issn":["1550-4832","1550-4840"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Journal on Emerging Technologies in Computing Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100451305","display_name":"Bing Li","orcid":"https://orcid.org/0000-0003-0732-2267"},"institutions":[{"id":"https://openalex.org/I4210163870","display_name":"United States Army Research Office","ror":"https://ror.org/05epdh915","country_code":"US","type":"government","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I166416128","https://openalex.org/I2802705668","https://openalex.org/I4210154437","https://openalex.org/I4210163870"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bing Li","raw_affiliation_strings":["Duke University, USA and Army Research Office, Research Triangle Park, USA"],"affiliations":[{"raw_affiliation_string":"Duke University, USA and Army Research Office, Research Triangle Park, USA","institution_ids":["https://openalex.org/I4210163870"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005196528","display_name":"Mengjie Mao","orcid":null},"institutions":[{"id":"https://openalex.org/I887054487","display_name":"MathWorks (United States)","ror":"https://ror.org/01n8qtk87","country_code":"US","type":"company","lineage":["https://openalex.org/I887054487"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mengjie Mao","raw_affiliation_strings":["MathWorks Inc., USA"],"affiliations":[{"raw_affiliation_string":"MathWorks Inc., USA","institution_ids":["https://openalex.org/I887054487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338931","display_name":"Xiaoxiao Liu","orcid":"https://orcid.org/0000-0002-1313-2761"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoxiao Liu","raw_affiliation_strings":["AMD, USA"],"affiliations":[{"raw_affiliation_string":"AMD, USA","institution_ids":["https://openalex.org/I4210137977"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100735051","display_name":"Tao Liu","orcid":"https://orcid.org/0000-0002-9653-4108"},"institutions":[{"id":"https://openalex.org/I19700959","display_name":"Florida International University","ror":"https://ror.org/02gz6gg07","country_code":"US","type":"education","lineage":["https://openalex.org/I19700959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tao Liu","raw_affiliation_strings":["Florida International University, Miami, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida International University, Miami, FL, USA","institution_ids":["https://openalex.org/I19700959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100323067","display_name":"Zihao Liu","orcid":"https://orcid.org/0000-0001-5306-6626"},"institutions":[{"id":"https://openalex.org/I19700959","display_name":"Florida International University","ror":"https://ror.org/02gz6gg07","country_code":"US","type":"education","lineage":["https://openalex.org/I19700959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zihao Liu","raw_affiliation_strings":["Florida International University, Miami, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida International University, Miami, FL, USA","institution_ids":["https://openalex.org/I19700959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067226050","display_name":"Wujie Wen","orcid":"https://orcid.org/0000-0003-0011-0675"},"institutions":[{"id":"https://openalex.org/I19700959","display_name":"Florida International University","ror":"https://ror.org/02gz6gg07","country_code":"US","type":"education","lineage":["https://openalex.org/I19700959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wujie Wen","raw_affiliation_strings":["Florida International University, Miami, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida International University, Miami, FL, USA","institution_ids":["https://openalex.org/I19700959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058073627","display_name":"Yiran Chen","orcid":"https://orcid.org/0000-0002-1486-8412"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiran Chen","raw_affiliation_strings":["Duke University, Durham, North Carolina, USA"],"affiliations":[{"raw_affiliation_string":"Duke University, Durham, North Carolina, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100429403","display_name":"Hai Li","orcid":"https://orcid.org/0000-0003-3228-6544"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hai (Helen) Li","raw_affiliation_strings":["Duke University, Durham, North Carolina, USA"],"affiliations":[{"raw_affiliation_string":"Duke University, Durham, North Carolina, USA","institution_ids":["https://openalex.org/I170897317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100451305"],"corresponding_institution_ids":["https://openalex.org/I4210163870"],"apc_list":null,"apc_paid":null,"fwci":0.246,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.53646099,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"15","issue":"4","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8488121032714844},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7295544147491455},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7053836584091187},{"id":"https://openalex.org/keywords/uniform-memory-access","display_name":"Uniform memory access","score":0.4544821083545685},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.41474977135658264},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4106401801109314},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.39460983872413635},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3770972490310669},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.22573530673980713},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.15812087059020996}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8488121032714844},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7295544147491455},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7053836584091187},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.4544821083545685},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.41474977135658264},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4106401801109314},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.39460983872413635},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3770972490310669},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.22573530673980713},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.15812087059020996}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3330152","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3330152","pdf_url":null,"source":{"id":"https://openalex.org/S96198239","display_name":"ACM Journal on Emerging Technologies in Computing Systems","issn_l":"1550-4832","issn":["1550-4832","1550-4840"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Journal on Emerging Technologies in Computing Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.9100000262260437,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G1557512144","display_name":null,"funder_award_id":"SC0017030","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G5343582487","display_name":null,"funder_award_id":"1725456 and 1615475","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1966668827","https://openalex.org/W1979866689","https://openalex.org/W2020572638","https://openalex.org/W2048242615","https://openalex.org/W2059301531","https://openalex.org/W2073899127","https://openalex.org/W2079248286","https://openalex.org/W2080592089","https://openalex.org/W2098040113","https://openalex.org/W2106562406","https://openalex.org/W2124237318","https://openalex.org/W2129232868","https://openalex.org/W2129381159","https://openalex.org/W2129817042","https://openalex.org/W2136800915","https://openalex.org/W2153882937","https://openalex.org/W2159908132","https://openalex.org/W2162838417","https://openalex.org/W2232645663","https://openalex.org/W2273440736","https://openalex.org/W3112784795","https://openalex.org/W4205474951","https://openalex.org/W4232973962","https://openalex.org/W4239813889","https://openalex.org/W4242958069","https://openalex.org/W4247176905","https://openalex.org/W4300133801"],"related_works":["https://openalex.org/W2023832055","https://openalex.org/W2082485924","https://openalex.org/W1979099492","https://openalex.org/W1837558792","https://openalex.org/W2098482419","https://openalex.org/W2070983336","https://openalex.org/W4254638342","https://openalex.org/W4384344236","https://openalex.org/W2465501459","https://openalex.org/W2026512611"],"abstract_inverted_index":{"Massive":[0],"multi-threading":[1],"in":[2,14],"GPU":[3,18,71,142,181,222],"imposes":[4],"tremendous":[5],"pressure":[6],"on":[7,151,198],"memory":[8,23,25,47,66,72,108,143,152],"subsystems.":[9],"Due":[10],"to":[11,44,69,99,105,117,120,139,147,170],"rapid":[12],"growth":[13],"thread-level":[15],"parallelism":[16],"of":[17,29,57,87,163,189,220],"and":[19,32,49,54,93,146,154,165,174,223],"slowly":[20],"improved":[21],"peak":[22],"bandwidth,":[24],"becomes":[26],"a":[27,62,90,95,131,199,211],"bottleneck":[28],"GPU\u2019s":[30],"performance":[31,53,172,187],"energy":[33,55,177],"efficiency.":[34],"In":[35,75],"this":[36],"article,":[37],"we":[38,60],"propose":[39,61],"an":[40,118],"integrated":[41],"architectural":[42],"scheme":[43,136],"optimize":[45],"the":[46,52,84,106,114,126,141,149,161,186,190,217],"accesses":[48],"therefore":[50],"boost":[51],"efficiency":[56],"GPU.":[58],"First,":[59],"thread":[63,80,91,115,128,132],"batch":[64,92,116],"enabled":[65],"partitioning":[67],"(TEMP)":[68],"improve":[70,140],"access":[73,144],"parallelism.":[74],"particular,":[76],"TEMP":[77,112,164],"groups":[78],"multiple":[79],"blocks":[81],"that":[82,160,202,210],"share":[83],"same":[85],"set":[86],"pages":[88],"into":[89],"applies":[94],"page":[96],"coloring":[97],"mechanism":[98],"bound":[100],"each":[101],"stream":[102],"multiprocessor":[103],"(SM)":[104],"dedicated":[107],"banks.":[109],"After":[110],"that,":[111],"dispatches":[113],"SM":[119],"ensure":[121,216],"high-parallel":[122],"memory-access":[123],"streaming":[124],"from":[125],"different":[127],"blocks.":[129],"Second,":[130],"batch-aware":[133],"scheduling":[134],"(TBAS)":[135],"is":[137],"introduced":[138],"locality":[145],"reduce":[148],"contention":[150],"controllers":[153],"interconnection":[155],"networks.":[156],"Experimental":[157],"results":[158,208],"show":[159,209],"integration":[162],"TBAS":[166],"can":[167,214],"achieve":[168],"up":[169],"10.3%":[171],"improvement":[173],"11.3%":[175],"DRAM":[176],"reduction":[178],"across":[179],"diverse":[180],"applications.":[182,225],"We":[183],"also":[184],"evaluate":[185],"interference":[188],"mixed":[191],"CPU+GPU":[192],"workloads":[193],"when":[194],"they":[195],"are":[196],"run":[197],"heterogeneous":[200],"system":[201],"employs":[203],"our":[204],"proposed":[205],"schemes.":[206],"Our":[207],"simple":[212],"solution":[213],"effectively":[215],"efficient":[218],"execution":[219],"both":[221],"CPU":[224]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
