{"id":"https://openalex.org/W4409133360","doi":"https://doi.org/10.1109/hpec62836.2024.10938416","title":"Benchmarking Thread Block Cluster","display_name":"Benchmarking Thread Block Cluster","publication_year":2024,"publication_date":"2024-09-23","ids":{"openalex":"https://openalex.org/W4409133360","doi":"https://doi.org/10.1109/hpec62836.2024.10938416"},"language":"en","primary_location":{"id":"doi:10.1109/hpec62836.2024.10938416","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec62836.2024.10938416","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116901641","display_name":"Tim L\u00fchnen","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tim L\u00fchnen","raw_affiliation_strings":["Hamburg University of Technology,Massively Parallel Systems Group,Hamburg,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hamburg University of Technology,Massively Parallel Systems Group,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116901642","display_name":"Tobias Marschner","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tobias Marschner","raw_affiliation_strings":["Hamburg University of Technology,Massively Parallel Systems Group,Hamburg,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hamburg University of Technology,Massively Parallel Systems Group,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083072467","display_name":"Sohan Lal","orcid":"https://orcid.org/0000-0002-2325-1705"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sohan Lal","raw_affiliation_strings":["Hamburg University of Technology,Massively Parallel Systems Group,Hamburg,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hamburg University of Technology,Massively Parallel Systems Group,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9366,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78758972,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.6033999919891357,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.6033999919891357,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.5648000240325928,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.4968999922275543,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7171492576599121},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7125649452209473},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.4707793891429901},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.43402600288391113},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3636157512664795},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.20125749707221985}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7171492576599121},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7125649452209473},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.4707793891429901},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.43402600288391113},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3636157512664795},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.20125749707221985},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec62836.2024.10938416","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec62836.2024.10938416","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W2110195531","https://openalex.org/W2129806488","https://openalex.org/W2155503253","https://openalex.org/W2163687928","https://openalex.org/W2164917554","https://openalex.org/W2913704315","https://openalex.org/W3018019719","https://openalex.org/W3102510044","https://openalex.org/W4308090436","https://openalex.org/W4312051897","https://openalex.org/W4400409880","https://openalex.org/W4401018292"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4238897586","https://openalex.org/W435179959","https://openalex.org/W2619091065","https://openalex.org/W2059640416","https://openalex.org/W1490753184","https://openalex.org/W2023832055","https://openalex.org/W2082485924"],"abstract_inverted_index":{"Graphics":[0],"processing":[1],"units":[2],"(GPUs)":[3],"have":[4],"become":[5],"essential":[6],"accelerators":[7],"in":[8,42,225,264],"the":[9,35,56,64,98,107,114,126,148,177,181,234,237,250,255],"fields":[10],"of":[11,34,66,101,125,147,150,180,236,258],"artificial":[12],"intelligence":[13],"(AI),":[14],"high":[15],"performance":[16,24,99],"computing":[17,28],"(HPC),":[18],"and":[19,72,137,240,261,268],"data":[20,174,196],"analytics,":[21],"offering":[22],"substantial":[23],"improvements":[25],"over":[26],"traditional":[27],"resources.":[29],"In":[30],"2022,":[31],"NVIDIA's":[32],"release":[33],"Hopper":[36,238],"architecture":[37,239],"marked":[38],"a":[39,47,79,144,169,208],"significant":[40,163],"advancement":[41],"GPU":[43],"design":[44],"by":[45,119,201,211],"adding":[46],"new":[48,103,242],"hierarchical":[49],"level":[50],"to":[51,89,162,188,207,253],"their":[52],"CUDA":[53],"programming":[54],"model:":[55],"thread":[57,67,171,183],"block":[58,172,184],"cluster":[59,221],"(TBC).":[60],"This":[61,94],"feature":[62,159],"enables":[63],"grouping":[65],"blocks,":[68],"facilitating":[69],"direct":[70,115],"communication":[71,116],"synchronization":[73],"between":[74],"them.":[75],"To":[76],"support":[77],"this,":[78],"dedicated":[80],"SM-to-SM":[81,127],"network":[82,128],"was":[83],"integrated,":[84],"connecting":[85],"streaming":[86],"multiprocessors":[87],"(SMs)":[88],"facilitate":[90],"efficient":[91],"inter-block":[92],"communication.":[93],"paper":[95,246],"delves":[96],"into":[97,176],"characteristics":[100],"this":[102,158,245],"feature,":[104,244],"specifically":[105],"examining":[106],"latencies":[108],"developers":[109,248],"can":[110,160,185,204,223],"anticipate":[111],"when":[112],"utilizing":[113,219],"channel":[117],"provided":[118],"TBCs.":[120,213],"We":[121],"present":[122],"an":[123,226],"analysis":[124],"behavior,":[129],"which":[130],"is":[131],"crucial":[132],"for":[133,195],"developing":[134,265],"accurate":[135,266],"analytical":[136,267],"cycle-accurate":[138,269],"simulation":[139,270],"models.":[140,271],"Our":[141,214],"study":[142],"includes":[143],"comprehensive":[145],"evaluation":[146],"impact":[149],"TBCs":[151],"on":[152],"application":[153],"performance,":[154],"highlighting":[155],"scenarios":[156],"where":[157,168],"lead":[161],"improvements.":[164],"For":[165],"instance,":[166],"applications":[167,199],"data-producing":[170],"writes":[173],"directly":[175],"shared":[178,202],"memory":[179,194,203],"consuming":[182],"be":[186],"up":[187,206],"2.3x":[189],"faster":[190],"than":[191],"using":[192],"global":[193],"transfer.":[197],"Additionally,":[198],"constrained":[200],"achieve":[205],"2.1x":[209],"speedup":[210],"employing":[212],"findings":[215],"also":[216],"reveal":[217],"that":[218],"large":[220],"dimensions":[222],"result":[224],"execution":[227],"time":[228],"overhead":[229],"exceeding":[230],"20%.":[231],"By":[232],"exploring":[233],"intricacies":[235],"its":[241],"TBC":[243],"equips":[247],"with":[249],"knowledge":[251],"needed":[252],"harness":[254],"full":[256],"potential":[257],"modern":[259],"GPUs":[260],"assists":[262],"researchers":[263]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
