{"id":"https://openalex.org/W4396814955","doi":"https://doi.org/10.23919/isc.2024.10528939","title":"Optimizing Distributed Training on Frontier for Large Language Models","display_name":"Optimizing Distributed Training on Frontier for Large Language Models","publication_year":2024,"publication_date":"2024-05-01","ids":{"openalex":"https://openalex.org/W4396814955","doi":"https://doi.org/10.23919/isc.2024.10528939"},"language":"en","primary_location":{"id":"doi:10.23919/isc.2024.10528939","is_oa":true,"landing_page_url":"https://doi.org/10.23919/isc.2024.10528939","pdf_url":"https://ieeexplore.ieee.org/ielx7/10528919/10528920/10528939.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ISC High Performance 2024 Research Paper Proceedings (39th International Conference)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/10528919/10528920/10528939.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040970890","display_name":"Sajal Dash","orcid":"https://orcid.org/0000-0001-5308-914X"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sajal Dash","raw_affiliation_strings":["Oak Ridge National Laboratory"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004692421","display_name":"Isaac Lyngaas","orcid":"https://orcid.org/0000-0002-1682-4309"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Isaac R Lyngaas","raw_affiliation_strings":["Oak Ridge National Laboratory"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051882926","display_name":"Junqi Yin","orcid":"https://orcid.org/0000-0003-3843-5520"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junqi Yin","raw_affiliation_strings":["Oak Ridge National Laboratory"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100411431","display_name":"Xiao Wang","orcid":"https://orcid.org/0000-0001-6545-1943"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiao Wang","raw_affiliation_strings":["Oak Ridge National Laboratory"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006450724","display_name":"Romain \u00c9gel\u00e9","orcid":"https://orcid.org/0000-0002-8992-8192"},"institutions":[{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Romain Egele","raw_affiliation_strings":["Universit\u00e9 Paris-Saclay"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay","institution_ids":["https://openalex.org/I277688954"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101581249","display_name":"J. Austin Ellis","orcid":"https://orcid.org/0009-0005-7161-1540"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]},{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]}],"countries":["FR","US"],"is_corresponding":false,"raw_author_name":"J. Austin Ellis","raw_affiliation_strings":["AMD","Oak Ridge National Laboratory","Universit\u00e9 Paris-Saclay"],"affiliations":[{"raw_affiliation_string":"AMD","institution_ids":[]},{"raw_affiliation_string":"Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]},{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay","institution_ids":["https://openalex.org/I277688954"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029414305","display_name":"Matthias Maiterth","orcid":"https://orcid.org/0000-0001-8698-460X"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthias Maiterth","raw_affiliation_strings":["Oak Ridge National Laboratory"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047240801","display_name":"Guojing Cong","orcid":null},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guojing Cong","raw_affiliation_strings":["Oak Ridge National Laboratory"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101916963","display_name":"Feiyi Wang","orcid":"https://orcid.org/0000-0002-0099-1559"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feiyi Wang","raw_affiliation_strings":["Oak Ridge National Laboratory"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068721920","display_name":"Prasanna Balaprakash","orcid":"https://orcid.org/0000-0002-0292-5715"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Prasanna Balaprakash","raw_affiliation_strings":["Oak Ridge National Laboratory"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5040970890"],"corresponding_institution_ids":["https://openalex.org/I1289243028"],"apc_list":null,"apc_paid":null,"fwci":4.726,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.95372725,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9779000282287598,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7616797685623169},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.6506116390228271},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.607612669467926},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5433609485626221},{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.467395156621933},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.4456435739994049},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.41708874702453613},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.41570496559143066}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7616797685623169},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.6506116390228271},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.607612669467926},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5433609485626221},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.467395156621933},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4456435739994049},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.41708874702453613},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.41570496559143066},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.23919/isc.2024.10528939","is_oa":true,"landing_page_url":"https://doi.org/10.23919/isc.2024.10528939","pdf_url":"https://ieeexplore.ieee.org/ielx7/10528919/10528920/10528939.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ISC High Performance 2024 Research Paper Proceedings (39th International Conference)","raw_type":"proceedings-article"},{"id":"pmh:oai:osti.gov:2438819","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/2438819","pdf_url":"https://www.osti.gov/servlets/purl/2438819","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.23919/isc.2024.10528939","is_oa":true,"landing_page_url":"https://doi.org/10.23919/isc.2024.10528939","pdf_url":"https://ieeexplore.ieee.org/ielx7/10528919/10528920/10528939.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ISC High Performance 2024 Research Paper Proceedings (39th International Conference)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1489425746","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G1645119126","display_name":null,"funder_award_id":"AC05-00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G1719536385","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G2296932962","display_name":null,"funder_award_id":"DE-AC05-00OR227","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3299391273","display_name":null,"funder_award_id":"E-AC05-00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G4423657506","display_name":null,"funder_award_id":"AC05-00OR22725","funder_id":"https://openalex.org/F4320338287","funder_display_name":"Oak Ridge National Laboratory"},{"id":"https://openalex.org/G4694431782","display_name":null,"funder_award_id":"DE-AC05-00OR2272","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G6488227018","display_name":null,"funder_award_id":"DE-AC05-00OR227","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G691578896","display_name":null,"funder_award_id":"DE-AC05-00OR2272","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7114708214","display_name":null,"funder_award_id":"DE-AC05-00OR2272","funder_id":"https://openalex.org/F4320338287","funder_display_name":"Oak Ridge National Laboratory"},{"id":"https://openalex.org/G7995982022","display_name":null,"funder_award_id":"DE-AC05","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8269158468","display_name":null,"funder_award_id":"AC05-00OR22725","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G8414908677","display_name":null,"funder_award_id":"DE-AC0","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8423070353","display_name":null,"funder_award_id":"DE-AC05-000R22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8799952057","display_name":null,"funder_award_id":"DE-AC05-00OR22","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8906985441","display_name":null,"funder_award_id":"00OR22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320338287","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4396814955.pdf"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W2612690371","https://openalex.org/W2896457183","https://openalex.org/W2913525628","https://openalex.org/W2969388332","https://openalex.org/W2991040477","https://openalex.org/W3001279689","https://openalex.org/W3129831491","https://openalex.org/W3132349482","https://openalex.org/W3204998121","https://openalex.org/W3206832494","https://openalex.org/W3209285784","https://openalex.org/W4221167110","https://openalex.org/W4225591000","https://openalex.org/W4229005866","https://openalex.org/W4281758439","https://openalex.org/W4292779060","https://openalex.org/W4308760226","https://openalex.org/W4311642023","https://openalex.org/W4380885974","https://openalex.org/W4384648639","https://openalex.org/W4385245566","https://openalex.org/W4386768656","https://openalex.org/W4387030238","https://openalex.org/W4388697414","https://openalex.org/W4404573785","https://openalex.org/W6737947904","https://openalex.org/W6755207826","https://openalex.org/W6772383348","https://openalex.org/W6778883912","https://openalex.org/W6810220367","https://openalex.org/W6810296985","https://openalex.org/W6811340617","https://openalex.org/W6838322825","https://openalex.org/W6854308872"],"related_works":["https://openalex.org/W2950520577","https://openalex.org/W1554644772","https://openalex.org/W2003935582","https://openalex.org/W2494130044","https://openalex.org/W3170887803","https://openalex.org/W74409296","https://openalex.org/W3209384898","https://openalex.org/W4400951174","https://openalex.org/W1595834484","https://openalex.org/W1991844655"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,240],"(LLMs)":[3],"have":[4,20,159],"demonstrated":[5,21],"remarkable":[6],"success":[7],"as":[8,97],"foundational":[9],"models,":[10],"benefiting":[11],"various":[12,89],"downstream":[13],"applications":[14],"through":[15,155,170],"fine-tuning.":[16],"Loss":[17],"scaling":[18,214,227],"studies":[19],"the":[22,80,138,196,199,205],"superior":[23],"performance":[24],"of":[25,38,167,189,198,229,243,246],"larger":[26],"LLMs":[27,35,166],"compared":[28],"to":[29,74,106,123,148,151],"their":[30,120,125],"smaller":[31],"counterparts.":[32],"Nevertheless,":[33],"training":[34,50,72,94,108,164,197,247],"with":[36],"billions":[37],"parameters":[39,122],"poses":[40],"significant":[41],"challenges":[42],"and":[43,87,91,102,119,132,144,173,181,192,204,218,231],"requires":[44,61],"considerable":[45],"computational":[46,134],"resources.":[47],"For":[48,176,195],"example,":[49],"a":[51,62,109,146],"one":[52],"trillion":[53,59],"parameter":[54,202,208],"GPT-style":[55],"model":[56,90,111,203],"on":[57,112,127,216],"20":[58],"tokens":[60],"staggering":[63],"120":[64],"million":[65],"exaflops.":[66],"This":[67],"research":[68],"explores":[69],"efficient":[70,161],"distributed":[71],"strategies":[73,162],"extract":[75],"this":[76],"computation":[77],"from":[78],"Frontier,":[79],"world's":[81],"first":[82],"exascale":[83],"supercomputer.":[84],"We":[85,114,136,158,223,237],"enable":[86],"investigate":[88],"data":[92,104],"parallel":[93],"techniques,":[95],"such":[96],"tensor":[98],"parallelism,":[99,101,105],"pipeline":[100],"sharded":[103],"facilitate":[107],"trillion-parameter":[110],"Frontier.":[113],"empirically":[115],"assess":[116],"these":[117,142,234,239],"techniques":[118,143],"associated":[121],"determine":[124],"impact":[126],"memory":[128],"footprint,":[129],"communication":[130],"latency,":[131],"GPU's":[133],"efficiency.":[135],"analyze":[137],"complex":[139],"interplay":[140],"among":[141],"find":[145],"strategy":[147],"combine":[149],"them":[150],"achieve":[152],"high":[153],"throughput":[154],"hyperparameter":[156,174],"tuning.":[157,175],"identified":[160],"for":[163,233],"large":[165],"varying":[168],"sizes":[169],"empirical":[171],"analysis":[172],"22":[177],"Billion,":[178,180],"175":[179,200],"1":[182,206],"Trillion":[183,207],"parameters,":[184],"we":[185,210],"achieved":[186,211,225],"GPU":[187],"throughputs":[188],"38.38%,":[190],"36.14%,":[191],"31.96%,":[193],"respectively.":[194,222],"Billion":[201],"model,":[209],"100%":[212],"weak":[213],"efficiency":[215],"1024":[217],"3072":[219],"Mi250X":[220],"GPUs,":[221],"also":[224],"strong":[226],"efficiencies":[228],"89%":[230],"87%":[232],"two":[235],"models.":[236],"trained":[238],"only":[241],"tens":[242],"iterations":[244],"instead":[245],"till":[248],"completion.":[249]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":5}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
