{"id":"https://openalex.org/W3187255235","doi":"https://doi.org/10.1145/3458817.3476209","title":"Efficient large-scale language model training on GPU clusters using megatron-LM","display_name":"Efficient large-scale language model training on GPU clusters using megatron-LM","publication_year":2021,"publication_date":"2021-10-21","ids":{"openalex":"https://openalex.org/W3187255235","doi":"https://doi.org/10.1145/3458817.3476209","mag":"3187255235"},"language":"en","primary_location":{"id":"doi:10.1145/3458817.3476209","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3458817.3476209","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.04473","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079514101","display_name":"Deepak Narayanan","orcid":"https://orcid.org/0000-0002-3020-2848"},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Deepak Narayanan","raw_affiliation_strings":["NVIDIA and Microsoft Research"],"affiliations":[{"raw_affiliation_string":"NVIDIA and Microsoft Research","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072436307","display_name":"Mohammad Shoeybi","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mohammad Shoeybi","raw_affiliation_strings":["NVIDIA","nVidia"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"nVidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010396318","display_name":"Jared Casper","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jared Casper","raw_affiliation_strings":["NVIDIA","nVidia"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"nVidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068437386","display_name":"Patrick LeGresley","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Patrick LeGresley","raw_affiliation_strings":["NVIDIA","nVidia"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"nVidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031170568","display_name":"Mostofa Patwary","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mostofa Patwary","raw_affiliation_strings":["NVIDIA","nVidia"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"nVidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085834120","display_name":"Vijay Anand Korthikanti","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vijay Korthikanti","raw_affiliation_strings":["NVIDIA","nVidia"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"nVidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067437615","display_name":"Dmitri Vainbrand","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Dmitri Vainbrand","raw_affiliation_strings":["NVIDIA","nVidia"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"nVidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011548998","display_name":"Prethvi Kashinkunti","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Prethvi Kashinkunti","raw_affiliation_strings":["NVIDIA","nVidia"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"nVidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078833726","display_name":"Julie Bernauer","orcid":"https://orcid.org/0000-0003-2345-995X"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Julie Bernauer","raw_affiliation_strings":["NVIDIA","nVidia"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"nVidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066242985","display_name":"Bryan Catanzaro","orcid":"https://orcid.org/0000-0003-0034-7728"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Bryan Catanzaro","raw_affiliation_strings":["NVIDIA","nVidia"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]},{"raw_affiliation_string":"nVidia","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011102459","display_name":"Amar Phanishayee","orcid":"https://orcid.org/0009-0001-2777-1118"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Amar Phanishayee","raw_affiliation_strings":["Microsoft Research","Microsoft Research#TAB#"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]},{"raw_affiliation_string":"Microsoft Research#TAB#","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005554337","display_name":"Matei Zaharia","orcid":"https://orcid.org/0000-0002-7547-7204"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matei Zaharia","raw_affiliation_strings":["Stanford University","Stanford University ()"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]},{"raw_affiliation_string":"Stanford University ()","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5079514101"],"corresponding_institution_ids":["https://openalex.org/I4210124949"],"apc_list":null,"apc_paid":null,"fwci":4.3389,"has_fulltext":true,"cited_by_count":38,"citation_normalized_percentile":{"value":0.95203827,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8843074440956116},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.7357447147369385},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7123957276344299},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.627802848815918},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6162058115005493},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.5692209005355835},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.5553391575813293},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.4199027717113495},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.28238576650619507},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1357100009918213}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8843074440956116},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.7357447147369385},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7123957276344299},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.627802848815918},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6162058115005493},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.5692209005355835},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.5553391575813293},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.4199027717113495},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.28238576650619507},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1357100009918213},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3458817.3476209","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3458817.3476209","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.04473","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.04473","pdf_url":"https://arxiv.org/pdf/2104.04473","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3187255235","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2104.04473.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2104.04473","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.04473","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.04473","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.04473","pdf_url":"https://arxiv.org/pdf/2104.04473","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.41999998688697815}],"awards":[{"id":"https://openalex.org/G1861310076","display_name":null,"funder_award_id":"CNS-1651570","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2499887605","display_name":null,"funder_award_id":"DGE-1656518","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G320138474","display_name":null,"funder_award_id":"-1656518","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5267932457","display_name":null,"funder_award_id":"1656518","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6671297155","display_name":null,"funder_award_id":"CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6894402473","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W3187255235.pdf"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W1967134278","https://openalex.org/W2338908902","https://openalex.org/W2579247884","https://openalex.org/W2622263826","https://openalex.org/W2785452945","https://openalex.org/W2884700152","https://openalex.org/W2950813464","https://openalex.org/W2962747323","https://openalex.org/W2963341956","https://openalex.org/W2963351145","https://openalex.org/W2963403868","https://openalex.org/W2965373594","https://openalex.org/W2969388332","https://openalex.org/W2970971581","https://openalex.org/W2973727699","https://openalex.org/W2974008169","https://openalex.org/W2977720775","https://openalex.org/W2979245724","https://openalex.org/W2980268386","https://openalex.org/W2991040477","https://openalex.org/W3013576704","https://openalex.org/W3030163527","https://openalex.org/W3031276512","https://openalex.org/W3036879053","https://openalex.org/W3037585619","https://openalex.org/W3037847693","https://openalex.org/W3039050620","https://openalex.org/W3082274269","https://openalex.org/W3103395072","https://openalex.org/W3119866685","https://openalex.org/W3121562065","https://openalex.org/W3127806443","https://openalex.org/W3130395060","https://openalex.org/W3132107458","https://openalex.org/W3136891003","https://openalex.org/W3137550677","https://openalex.org/W3156643189","https://openalex.org/W3171483929","https://openalex.org/W6600297362","https://openalex.org/W6600466347","https://openalex.org/W6743955621","https://openalex.org/W6756379755"],"related_works":["https://openalex.org/W3204998121","https://openalex.org/W3143293307","https://openalex.org/W3110773422","https://openalex.org/W3168387563","https://openalex.org/W3156643189","https://openalex.org/W3101434632","https://openalex.org/W2109744350","https://openalex.org/W2741888411","https://openalex.org/W647925487","https://openalex.org/W2559794607","https://openalex.org/W3129831491","https://openalex.org/W3152631857","https://openalex.org/W2748614007","https://openalex.org/W3168682801","https://openalex.org/W2802840548","https://openalex.org/W3210803593","https://openalex.org/W3154478120","https://openalex.org/W3168584267","https://openalex.org/W1735605371","https://openalex.org/W2765909029"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,14,31],"have":[3,64],"led":[4],"to":[5,28,74,95,97,118,125],"state-of-the-art":[6],"accuracies":[7],"across":[8],"several":[9],"tasks.":[10],"However,":[11],"training":[12,50,127],"these":[13,71],"efficiently":[15],"is":[16,23],"challenging":[17],"because:":[18],"a)":[19],"GPU":[20],"memory":[21,115],"capacity":[22],"limited,":[24],"making":[25],"it":[26],"impossible":[27],"fit":[29],"large":[30],"on":[32,129,139],"even":[33],"a":[34,103,130],"multi-GPU":[35],"server,":[36],"and":[37,61,89],"b)":[38],"the":[39],"number":[40],"of":[41,55,70,79,99,144,146],"compute":[42],"operations":[43],"required":[44],"can":[45,92,109],"result":[46],"in":[47],"unrealistically":[48],"long":[49],"times.":[51],"Consequently,":[52],"new":[53],"methods":[54,72],"model":[56,131],"parallelism":[57,63,91],"such":[58],"as":[59],"tensor":[60],"pipeline":[62],"been":[65],"proposed.":[66],"Unfortunately,":[67],"naive":[68],"usage":[69],"leads":[73],"scaling":[75],"issues":[76],"at":[77,136],"thousands":[78,98],"GPUs.":[80,100],"In":[81],"this":[82],"paper,":[83],"we":[84],"show":[85],"how":[86],"tensor,":[87],"pipeline,":[88],"data":[90],"be":[93],"composed":[94],"scale":[96],"We":[101],"propose":[102],"novel":[104],"interleaved":[105],"pipelining":[106],"schedule":[107],"that":[108],"improve":[110],"throughput":[111,143],"by":[112],"10+%":[113],"with":[114,132],"footprint":[116],"comparable":[117],"existing":[119],"approaches.":[120],"Our":[121],"approach":[122],"allows":[123],"us":[124],"perform":[126],"iterations":[128],"1":[133],"trillion":[134],"parameters":[135],"502":[137],"petaFLOP/s":[138],"3072":[140],"GPUs":[141],"(per-GPU":[142],"52%":[145],"theoretical":[147],"peak).":[148]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":6}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
