{"id":"https://openalex.org/W4318541666","doi":"https://doi.org/10.1145/3575693.3575745","title":"NUBA: Non-Uniform Bandwidth GPUs","display_name":"NUBA: Non-Uniform Bandwidth GPUs","publication_year":2023,"publication_date":"2023-01-27","ids":{"openalex":"https://openalex.org/W4318541666","doi":"https://doi.org/10.1145/3575693.3575745"},"language":"en","primary_location":{"id":"doi:10.1145/3575693.3575745","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3575693.3575745","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hdl.handle.net/11250/3105140","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072444630","display_name":"Xia Zhao","orcid":"https://orcid.org/0000-0001-6479-9200"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xia Zhao","raw_affiliation_strings":["Academy of Military Sciences, China"],"affiliations":[{"raw_affiliation_string":"Academy of Military Sciences, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067108530","display_name":"Magnus Jahre","orcid":"https://orcid.org/0000-0001-9147-5228"},"institutions":[{"id":"https://openalex.org/I4210165875","display_name":"NTNU Samfunnsforskning","ror":"https://ror.org/05pv30e80","country_code":"NO","type":"facility","lineage":["https://openalex.org/I4210165875"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Magnus Jahre","raw_affiliation_strings":["NTNU, Norway"],"affiliations":[{"raw_affiliation_string":"NTNU, Norway","institution_ids":["https://openalex.org/I4210165875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065371644","display_name":"Yuhua Tang","orcid":"https://orcid.org/0000-0002-4956-3379"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhua Tang","raw_affiliation_strings":["National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049705266","display_name":"Guangda Zhang","orcid":"https://orcid.org/0000-0003-4732-9674"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guangda Zhang","raw_affiliation_strings":["Academy of Military Sciences, China"],"affiliations":[{"raw_affiliation_string":"Academy of Military Sciences, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033119975","display_name":"Lieven Eeckhout","orcid":"https://orcid.org/0000-0001-8792-4473"},"institutions":[{"id":"https://openalex.org/I2801227569","display_name":"Ghent University Hospital","ror":"https://ror.org/00xmkp704","country_code":"BE","type":"healthcare","lineage":["https://openalex.org/I2801227569"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Lieven Eeckhout","raw_affiliation_strings":["Ghent University, Belgium"],"affiliations":[{"raw_affiliation_string":"Ghent University, Belgium","institution_ids":["https://openalex.org/I2801227569"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5072444630"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.6114,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.93298812,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"544","last_page":"559"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5711086988449097},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5466665625572205},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.48067960143089294},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3401109576225281},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1660948395729065}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5711086988449097},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5466665625572205},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.48067960143089294},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3401109576225281},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1660948395729065}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3575693.3575745","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3575693.3575745","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},{"id":"pmh:oai:archive.ugent.be:01HQ0WT764375CFNS64KFDW3H3","is_oa":false,"landing_page_url":"http://hdl.handle.net/1854/LU-01HQ0WT764375CFNS64KFDW3H3","pdf_url":null,"source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISBN: 9781450399166","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:ntnuopen.ntnu.no:11250/3105140","is_oa":true,"landing_page_url":"https://hdl.handle.net/11250/3105140","pdf_url":null,"source":{"id":"https://openalex.org/S4306401716","display_name":"Duo Research Archive (University of Oslo)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I184942183","host_organization_name":"University of Oslo","host_organization_lineage":["https://openalex.org/I184942183"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"544-559","raw_type":"info:eu-repo/semantics/bookPart"}],"best_oa_location":{"id":"pmh:oai:ntnuopen.ntnu.no:11250/3105140","is_oa":true,"landing_page_url":"https://hdl.handle.net/11250/3105140","pdf_url":null,"source":{"id":"https://openalex.org/S4306401716","display_name":"Duo Research Archive (University of Oslo)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I184942183","host_organization_name":"University of Oslo","host_organization_lineage":["https://openalex.org/I184942183"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"544-559","raw_type":"info:eu-repo/semantics/bookPart"},"sustainable_development_goals":[{"score":0.5699999928474426,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1313632636","display_name":null,"funder_award_id":"741097","funder_id":"https://openalex.org/F4320334678","funder_display_name":"European Research Council"},{"id":"https://openalex.org/G2730306035","display_name":null,"funder_award_id":"BOF-GOA","funder_id":"https://openalex.org/F4320322603","funder_display_name":"Universiteit Gent"},{"id":"https://openalex.org/G3085993365","display_name":null,"funder_award_id":"(Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3341358030","display_name":null,"funder_award_id":"","funder_id":"https://openalex.org/F4320334978","funder_display_name":"Beijing Nova Program"},{"id":"https://openalex.org/G3748656914","display_name":null,"funder_award_id":"Norway","funder_id":"https://openalex.org/F4320323299","funder_display_name":"Norges Forskningsr\u00e5d"},{"id":"https://openalex.org/G37568934","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4425063659","display_name":null,"funder_award_id":"62102438","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5524483453","display_name":null,"funder_award_id":"286596","funder_id":"https://openalex.org/F4320323299","funder_display_name":"Norges Forskningsr\u00e5d"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322603","display_name":"Universiteit Gent","ror":"https://ror.org/00cv9y106"},{"id":"https://openalex.org/F4320323299","display_name":"Norges Forskningsr\u00e5d","ror":"https://ror.org/00epmv149"},{"id":"https://openalex.org/F4320334678","display_name":"European Research Council","ror":"https://ror.org/0472cxd90"},{"id":"https://openalex.org/F4320334978","display_name":"Beijing Nova Program","ror":"https://ror.org/034k14f91"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1985539519","https://openalex.org/W1985818188","https://openalex.org/W1990550838","https://openalex.org/W1991357793","https://openalex.org/W2006071227","https://openalex.org/W2033571023","https://openalex.org/W2034861439","https://openalex.org/W2067540437","https://openalex.org/W2080592089","https://openalex.org/W2081597309","https://openalex.org/W2085844157","https://openalex.org/W2089229046","https://openalex.org/W2091905905","https://openalex.org/W2093043622","https://openalex.org/W2096169320","https://openalex.org/W2098701183","https://openalex.org/W2100926301","https://openalex.org/W2106342588","https://openalex.org/W2116464699","https://openalex.org/W2122797462","https://openalex.org/W2124142472","https://openalex.org/W2128120785","https://openalex.org/W2129817042","https://openalex.org/W2140455011","https://openalex.org/W2143515003","https://openalex.org/W2150446161","https://openalex.org/W2151233837","https://openalex.org/W2157134596","https://openalex.org/W2157802978","https://openalex.org/W2161364792","https://openalex.org/W2175048909","https://openalex.org/W2416722775","https://openalex.org/W2496508069","https://openalex.org/W2625200202","https://openalex.org/W2626312854","https://openalex.org/W2761710529","https://openalex.org/W2764065518","https://openalex.org/W2793599434","https://openalex.org/W2795081729","https://openalex.org/W2884108789","https://openalex.org/W2885000039","https://openalex.org/W2886156724","https://openalex.org/W2903659818","https://openalex.org/W2904412652","https://openalex.org/W2952269378","https://openalex.org/W2978742436","https://openalex.org/W3017188964","https://openalex.org/W3017302221","https://openalex.org/W3089469538","https://openalex.org/W3099807387","https://openalex.org/W3134495297","https://openalex.org/W3159607817","https://openalex.org/W4236010665","https://openalex.org/W4238569137","https://openalex.org/W4252234101","https://openalex.org/W4253046107"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W2350741829","https://openalex.org/W2530322880","https://openalex.org/W2405661381"],"abstract_inverted_index":{"The":[0,211],"parallel":[1],"execution":[2],"model":[3],"of":[4,10,12,50,144,197],"GPUs":[5,74,93,199],"enables":[6,224],"scaling":[7],"to":[8,44,109,168,189,192,228,296,301,315],"hundreds":[9],"thousands":[11],"threads,":[13],"which":[14,223,244],"is":[15,104,216,313],"a":[16,48,95,107,116,129,149,158,172,184],"key":[17,102],"capability":[18],"that":[19,105,146,263],"many":[20],"modern":[21],"high-performance":[22],"applications":[23],"exploit.":[24],"GPU":[25,37,130,142,226,286],"vendors":[26],"are":[27,75],"hence":[28,162],"increasing":[29],"the":[30,42,111,124,164,169,195,206,225,310],"compute":[31],"and":[32,58,64,68,88,100,119,152,183,208,282,292,298,304,329],"memory":[33,59,159],"resources":[34],"with":[35,179,249],"every":[36],"generation":[38],"\u2014":[39,161,182,188],"resulting":[40],"in":[41,71,231],"need":[43],"efficiently":[45],"stitch":[46],"together":[47],"plethora":[49],"Symmetric":[51],"Multiprocessors":[52],"(SMs),":[53],"Last-Level":[54],"Cache":[55],"(LLC)":[56],"slices":[57,154,270],"controllers":[60],"while":[61,234,318],"maximizing":[62],"bandwidth":[63,84,114,167],"keeping":[65],"power":[66,118,325],"consumption":[67,326],"design":[69],"complexity":[70,120],"check.":[72],"Conventional":[73],"Uniform":[76],"Bandwidth":[77,126],"Architectures":[78],"(UBAs)":[79],"as":[80,155,157],"they":[81,175],"provide":[82],"equal":[83],"between":[85,186],"all":[86,89],"SMs":[87,151,170],"LLC":[90,112,137,153,166,269],"slices.":[91],"UBA":[92,306],"require":[94],"uniform":[96],"high-bandwidth":[97],"Network-on-Chip":[98],"(NoC),":[99],"our":[101,217,284],"observation":[103],"provisioning":[106],"NoC":[108,185,324],"match":[110],"slice":[113,138],"incurs":[115],"hefty":[117],"overhead.":[121],"We":[122],"propose":[123,240],"Non-Uniform":[125],"Architecture":[127],"(NUBA),":[128],"system":[131,204,213],"architecture":[132],"aimed":[133],"at":[134,252],"fully":[135],"utilizing":[136],"bandwidth.":[139],"A":[140],"NUBA":[141,198,285,311,322],"consists":[143],"partitions":[145,187,233],"each":[147],"feature":[148],"few":[150],"well":[156],"controller":[160],"exposing":[163],"complete":[165],"within":[171],"partition":[173],"since":[174],"can":[176,273],"be":[177,274],"connected":[178],"point-to-point":[180],"links":[181],"enable":[190],"access":[191],"remote":[193],"data.Exploiting":[194],"potential":[196],"however":[200],"requires":[201],"carefully":[202],"co-designing":[203],"software,":[205],"compiler":[207],"architectural":[209,261],"policies.":[210],"critical":[212],"software":[214],"component":[215],"Local-And-Balanced":[218],"(LAB)":[219],"page":[220],"placement":[221],"policy":[222],"driver":[227],"place":[229],"data":[230,248,267],"local":[232],"avoiding":[235],"load":[236],"imbalance.":[237],"Moreover,":[238],"we":[239],"Model-Driven":[241],"Replication":[242],"(MDR)":[243],"identifies":[245],"read-only":[246,265],"shared":[247,266],"data-flow":[250],"analysis":[251],"compile":[253],"time.":[254],"At":[255],"run":[256],"time,":[257],"MDR":[258],"leverages":[259],"an":[260],"mechanism":[262],"replicates":[264],"across":[268],"when":[271],"this":[272],"done":[275],"without":[276],"pressuring":[277],"cache":[278],"capacity.":[279],"With":[280],"LAB":[281],"MDR,":[283],"improves":[287],"average":[288],"performance":[289],"by":[290,327],"23.1%":[291],"22.2%":[293],"(and":[294],"up":[295],"183.9%":[297],"182.4%)":[299],"compared":[300],"iso-resource":[302],"memory-side":[303],"SM-side":[305],"GPUs,":[307],"respectively.":[308,331],"When":[309],"concept":[312],"leveraged":[314],"reduce":[316],"overhead":[317],"maintaining":[319],"similar":[320],"performance,":[321],"reduces":[323],"12.1\u00d7":[328],"9.4\u00d7,":[330]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
