{"id":"https://openalex.org/W4415250872","doi":"https://doi.org/10.1109/hpec67600.2025.11196413","title":"Accelerating Supercomputing: AI-Hardware-Driven Innovation for Speed and Efficiency","display_name":"Accelerating Supercomputing: AI-Hardware-Driven Innovation for Speed and Efficiency","publication_year":2025,"publication_date":"2025-09-15","ids":{"openalex":"https://openalex.org/W4415250872","doi":"https://doi.org/10.1109/hpec67600.2025.11196413"},"language":"en","primary_location":{"id":"doi:10.1109/hpec67600.2025.11196413","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196413","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075517045","display_name":"Jack Dongarra","orcid":"https://orcid.org/0000-0003-3247-1782"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jack Dongarra","raw_affiliation_strings":["University of Tennessee Oak Ridge National Laboratory University of Manchester,Oak Ridge,TN,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Tennessee Oak Ridge National Laboratory University of Manchester,Oak Ridge,TN,USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062279572","display_name":"John A. Gunnels","orcid":"https://orcid.org/0000-0001-5110-190X"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Gunnels","raw_affiliation_strings":["NVIDIA Corporation,Santa Clara,CA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation,Santa Clara,CA,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028957326","display_name":"Harun Bayraktar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Harun Bayraktar","raw_affiliation_strings":["NVIDIA Corporation,Santa Clara,CA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation,Santa Clara,CA,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101964224","display_name":"Azzam Haidar","orcid":"https://orcid.org/0000-0002-3177-2084"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Azzam Haidar","raw_affiliation_strings":["NVIDIA Corporation,Santa Clara,CA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation,Santa Clara,CA,USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113999461","display_name":"Dan Ernst","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dan Ernst","raw_affiliation_strings":["NVIDIA Corporation,Santa Clara,CA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation,Santa Clara,CA,USA","institution_ids":["https://openalex.org/I4210127875"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28113879,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9277999997138977,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9277999997138977,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5828999876976013},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.44440001249313354},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.42080000042915344},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.38589999079704285},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.3758000135421753},{"id":"https://openalex.org/keywords/applications-of-artificial-intelligence","display_name":"Applications of artificial intelligence","score":0.3303999900817871},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.329800009727478}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6818000078201294},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5828999876976013},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.44440001249313354},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.42080000042915344},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.38589999079704285},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.3758000135421753},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3596999943256378},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35249999165534973},{"id":"https://openalex.org/C157170001","wikidata":"https://www.wikidata.org/wiki/Q4781507","display_name":"Applications of artificial intelligence","level":2,"score":0.3303999900817871},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.329800009727478},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3086000084877014},{"id":"https://openalex.org/C159149176","wikidata":"https://www.wikidata.org/wiki/Q14489129","display_name":"Evolutionary algorithm","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.30070000886917114},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.29170000553131104},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.28290000557899475},{"id":"https://openalex.org/C13736549","wikidata":"https://www.wikidata.org/wiki/Q4489420","display_name":"Industrial engineering","level":1,"score":0.273499995470047},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.26750001311302185},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec67600.2025.11196413","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec67600.2025.11196413","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1977146902","https://openalex.org/W1984222112","https://openalex.org/W2000636639","https://openalex.org/W2022792550","https://openalex.org/W2032309817","https://openalex.org/W2066103119","https://openalex.org/W2120432001","https://openalex.org/W2165384099","https://openalex.org/W2169631286","https://openalex.org/W2170901118","https://openalex.org/W2606722458","https://openalex.org/W2769837669","https://openalex.org/W2895305554","https://openalex.org/W2919115771","https://openalex.org/W2981980078","https://openalex.org/W3108141724","https://openalex.org/W3138530731","https://openalex.org/W3211043030","https://openalex.org/W3217045543","https://openalex.org/W4221160294","https://openalex.org/W4234662483","https://openalex.org/W4250981202","https://openalex.org/W4300273322","https://openalex.org/W4400681470","https://openalex.org/W4405754839","https://openalex.org/W4405755284","https://openalex.org/W4405756116"],"related_works":[],"abstract_inverted_index":{"The":[0],"evolution":[1],"of":[2,132],"GPUs":[3],"has":[4],"resulted":[5],"in":[6,82,92],"democratized":[7],"access":[8],"to":[9,78],"increasingly":[10],"powerful":[11],"low-precision":[12],"compute":[13],"capabilities,":[14],"designed":[15],"for":[16,35],"artificial":[17],"intelligence":[18],"(AI),":[19],"particularly":[20],"large":[21],"language":[22],"models":[23],"(LLMs)":[24],"and":[25,52,60,84,99,123,140],"generative":[26],"AI.":[27],"These":[28],"algorithms":[29,59],"heavily":[30],"utilize":[31],"hardware":[32],"units":[33],"specialized":[34],"matrix":[36],"multiplication,":[37],"such":[38,94,126],"as":[39,95,127],"Tensor":[40,138],"Cores,":[41,139],"that":[42],"have":[43,66],"advanced":[44],"since":[45],"their":[46,103,145],"introduction,":[47],"offering":[48],"improved":[49],"functionality,":[50],"throughput,":[51],"energy":[53],"efficiency.":[54,86],"Two":[55],"key":[56],"techniques:":[57],"mixed-precision":[58,97],"floating-point":[61],"emulation,":[62],"leveraging":[63],"these":[64,88,116,148],"resources,":[65],"emerged.":[67],"They":[68],"enable":[69],"scientific":[70],"applications,":[71],"many":[72],"dependent":[73],"upon":[74],"high-precision":[75],"linear":[76],"algebra,":[77],"achieve":[79],"dramatic":[80],"gains":[81],"performance":[83],"power":[85],"Additionally,":[87],"methods":[89],"facilitate":[90],"innovation":[91],"areas":[93],"fine-grained":[96],"strategies":[98],"data":[100],"compression,":[101],"broadening":[102],"impact":[104],"across":[105],"diverse":[106],"computing":[107],"platforms.":[108],"This":[109],"paper":[110],"explores":[111],"the":[112,128,133],"opportunities":[113],"afforded":[114],"by":[115],"developments.":[117],"We":[118],"highlight":[119],"both":[120],"evolutionary":[121],"advances":[122],"revolutionary":[124],"features,":[125],"enhanced":[129],"scaling":[130],"capabilities":[131],"latest":[134],"NVIDIA":[135],"Blackwell":[136],"architecture\u2019s":[137],"present":[141],"empirical":[142],"results,":[143],"demonstrating":[144],"effectiveness":[146],"on":[147],"GPUs.":[149]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-16T00:00:00"}
