{"id":"https://openalex.org/W4385567949","doi":"https://doi.org/10.1145/3580305.3599573","title":"Training Large-scale Foundation Models on Emerging AI Chips","display_name":"Training Large-scale Foundation Models on Emerging AI Chips","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4385567949","doi":"https://doi.org/10.1145/3580305.3599573"},"language":"en","primary_location":{"id":"doi:10.1145/3580305.3599573","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599573","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082273313","display_name":"Aashiq Muhamed","orcid":"https://orcid.org/0000-0002-8657-0439"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Aashiq Muhamed","raw_affiliation_strings":["AWS AI Labs, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047048637","display_name":"Christian Bock","orcid":"https://orcid.org/0000-0002-0701-5868"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Christian Bock","raw_affiliation_strings":["AWS AI Labs, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs, Munich, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008777481","display_name":"Rahul Solanki","orcid":"https://orcid.org/0009-0004-8176-4594"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rahul Solanki","raw_affiliation_strings":["AWS Neuron, Cupertino, CA, USA"],"affiliations":[{"raw_affiliation_string":"AWS Neuron, Cupertino, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007175636","display_name":"Youngsuk Park","orcid":"https://orcid.org/0000-0002-0970-9214"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Youngsuk Park","raw_affiliation_strings":["AWS AI Labs, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101638214","display_name":"Yida Wang","orcid":"https://orcid.org/0000-0001-8165-840X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yida Wang","raw_affiliation_strings":["AWS AIRE, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"AWS AIRE, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080409305","display_name":"Jun Huan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jun Huan","raw_affiliation_strings":["AWS AI Labs, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs, Santa Clara, CA, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5082273313"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6018,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.68982111,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"5821","last_page":"5822"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.7262470126152039},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6942003965377808},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.6061387658119202},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5285890102386475},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5260969400405884},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4657839834690094},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44251203536987305},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3257367014884949},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.2680337429046631},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13320034742355347}],"concepts":[{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.7262470126152039},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6942003965377808},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.6061387658119202},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5285890102386475},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5260969400405884},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4657839834690094},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44251203536987305},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3257367014884949},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2680337429046631},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13320034742355347},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3580305.3599573","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599573","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2884700152","https://openalex.org/W2963341956","https://openalex.org/W2969388332","https://openalex.org/W2981852735","https://openalex.org/W3037847693","https://openalex.org/W3129831491","https://openalex.org/W4229701516","https://openalex.org/W4288089799","https://openalex.org/W4312933868","https://openalex.org/W4312983671"],"related_works":["https://openalex.org/W4382323155","https://openalex.org/W4315697128","https://openalex.org/W3205506801","https://openalex.org/W2971502891","https://openalex.org/W3183570023","https://openalex.org/W4287067436","https://openalex.org/W4280599700","https://openalex.org/W2986126107","https://openalex.org/W4323650687","https://openalex.org/W2381393187"],"abstract_inverted_index":{"Foundation":[0],"models":[1,66,103,126,153],"such":[2,21,42],"as":[3,22,43,91],"ChatGPT":[4],"and":[5,14,29,97],"GPT-4":[6,102],"have":[7,67,71],"garnered":[8],"significant":[9],"interest":[10],"from":[11],"both":[12],"academia":[13],"industry":[15],"due":[16],"to":[17,106,164],"their":[18,72],"emergent":[19],"capabilities,":[20],"few-shot":[23],"prompting,":[24],"multi-step":[25],"reasoning,":[26],"instruction":[27],"following,":[28],"model":[30,87,94,136],"calibration.":[31],"Such":[32],"capabilities":[33,63],"were":[34],"previously":[35],"only":[36],"attainable":[37],"with":[38,58,143],"specially":[39],"designed":[40],"models,":[41],"those":[44],"using":[45],"knowledge":[46],"graphs,":[47],"but":[48],"can":[49,146,160],"now":[50],"be":[51],"achieved":[52],"on":[53,137,154],"a":[54,75,92,134,138,155],"much":[55,77],"larger":[56],"scale":[57],"foundation":[59,65,125],"models.":[60],"As":[61],"the":[62,84,100,166],"of":[64,113,116,124],"increased,":[68],"so":[69],"too":[70],"sizes":[73],"at":[74],"rate":[76],"faster":[78],"than":[79],"Moore's":[80],"law.":[81],"For":[82,131],"example,":[83],"BERT":[85,135],"large":[86,156],"was":[88],"initially":[89],"released":[90],"334M":[93],"in":[95,118],"2018,":[96],"by":[98],"2023,":[99],"largest":[101],"are":[104],"estimated":[105,167],"range":[107],"between":[108],"200-300B,":[109],"representing":[110],"an":[111],"increase":[112],"three":[114],"orders":[115],"magnitude":[117],"just":[119],"five":[120],"years.":[121],"The":[122],"training":[123,133,151],"requires":[127],"massive":[128],"computing":[129],"power.":[130],"instance,":[132],"single":[139],"state-of-the-art":[140],"GPU":[141,158],"machine":[142],"multi-A100":[144],"chips":[145],"take":[147,161],"several":[148,162],"days,":[149],"while":[150],"GPT-3":[152],"multi-instance":[157],"cluster":[159],"months":[163],"complete":[165],"3":[168],"X":[169],"1023":[170],"flops.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
