{"id":"https://openalex.org/W4406459231","doi":"https://doi.org/10.1109/bigdata62323.2024.10825098","title":"HLAT: High-quality Large Language Model Pre-trained on AWS Trainium","display_name":"HLAT: High-quality Large Language Model Pre-trained on AWS Trainium","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406459231","doi":"https://doi.org/10.1109/bigdata62323.2024.10825098"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825098","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825098","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053817404","display_name":"Haozheng Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Haozheng Fan","raw_affiliation_strings":["Amazon,Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon,Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101923484","display_name":"Hao Zhou","orcid":"https://orcid.org/0009-0009-8675-8253"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Zhou","raw_affiliation_strings":["Amazon,AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"Amazon,AWS AI Labs","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030894786","display_name":"Guangtai Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guangtai Huang","raw_affiliation_strings":["Amazon,Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon,Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010360658","display_name":"Parameswaran Raman","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Parameswaran Raman","raw_affiliation_strings":["Amazon,Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon,Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020914269","display_name":"Xinwei Fu","orcid":"https://orcid.org/0009-0004-7822-5450"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xinwei Fu","raw_affiliation_strings":["Amazon,Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon,Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081778658","display_name":"Gaurav Gupta","orcid":"https://orcid.org/0000-0001-7763-3090"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gaurav Gupta","raw_affiliation_strings":["Amazon,AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"Amazon,AWS AI Labs","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109706322","display_name":"Dhananjay Ram","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dhananjay Ram","raw_affiliation_strings":["Amazon,AGI Foundations"],"affiliations":[{"raw_affiliation_string":"Amazon,AGI Foundations","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101638214","display_name":"Yida Wang","orcid":"https://orcid.org/0000-0001-8165-840X"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yida Wang","raw_affiliation_strings":["Amazon,Amazon Web Services"],"affiliations":[{"raw_affiliation_string":"Amazon,Amazon Web Services","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080409305","display_name":"Jun Huan","orcid":"https://orcid.org/0000-0002-7020-1604"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Huan","raw_affiliation_strings":["Amazon,AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"Amazon,AWS AI Labs","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5053817404"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":0.3626,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.70788843,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"2100","last_page":"2109"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7212991714477539},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.6005576252937317},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.3356861472129822},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3231220543384552},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10259628295898438}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7212991714477539},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.6005576252937317},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.3356861472129822},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3231220543384552},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10259628295898438},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825098","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825098","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.800000011920929,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":82,"referenced_works":["https://openalex.org/W2338908902","https://openalex.org/W2763421725","https://openalex.org/W2794325560","https://openalex.org/W2896457183","https://openalex.org/W2912924812","https://openalex.org/W2946609015","https://openalex.org/W2953271402","https://openalex.org/W2963173418","https://openalex.org/W2963339397","https://openalex.org/W2963518342","https://openalex.org/W2965373594","https://openalex.org/W2973727699","https://openalex.org/W2990138404","https://openalex.org/W2998617917","https://openalex.org/W3034999214","https://openalex.org/W3081168214","https://openalex.org/W3105882417","https://openalex.org/W3129831491","https://openalex.org/W3177813494","https://openalex.org/W3187134297","https://openalex.org/W3194676777","https://openalex.org/W3201174429","https://openalex.org/W4225591000","https://openalex.org/W4286892945","https://openalex.org/W4301581299","https://openalex.org/W4321472057","https://openalex.org/W4322718191","https://openalex.org/W4362515116","https://openalex.org/W4376167329","https://openalex.org/W4379468930","https://openalex.org/W4381586770","https://openalex.org/W4384918448","https://openalex.org/W4385245566","https://openalex.org/W4385889719","https://openalex.org/W4386185625","https://openalex.org/W4386501849","https://openalex.org/W4386768656","https://openalex.org/W4387561465","https://openalex.org/W4389157038","https://openalex.org/W4389761026","https://openalex.org/W4390041933","https://openalex.org/W4390437677","https://openalex.org/W4391136507","https://openalex.org/W4402671236","https://openalex.org/W4404386171","https://openalex.org/W4404573785","https://openalex.org/W6638783484","https://openalex.org/W6703652217","https://openalex.org/W6745245109","https://openalex.org/W6749107692","https://openalex.org/W6749838110","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6764401283","https://openalex.org/W6766673545","https://openalex.org/W6767997687","https://openalex.org/W6773820404","https://openalex.org/W6778883912","https://openalex.org/W6781254577","https://openalex.org/W6782879696","https://openalex.org/W6798182279","https://openalex.org/W6801617135","https://openalex.org/W6803096969","https://openalex.org/W6810220367","https://openalex.org/W6838632916","https://openalex.org/W6850182213","https://openalex.org/W6850625674","https://openalex.org/W6851775633","https://openalex.org/W6852584927","https://openalex.org/W6852800892","https://openalex.org/W6853517571","https://openalex.org/W6854692045","https://openalex.org/W6854866820","https://openalex.org/W6855970221","https://openalex.org/W6856223801","https://openalex.org/W6856289622","https://openalex.org/W6858077893","https://openalex.org/W6858453470","https://openalex.org/W6859664255","https://openalex.org/W6860041859","https://openalex.org/W6860218782","https://openalex.org/W6893640197"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Getting":[0],"large":[1,22,84],"language":[2],"models":[3,141,226],"(LLMs)":[4],"to":[5,30,36,48,101,221],"perform":[6],"well":[7],"on":[8,95,150,168],"the":[9,38,52,61,64,75,171,181,192,197],"downstream":[10],"tasks":[11],"requires":[12],"pre-training":[13],"over":[14,127],"trillions":[15],"of":[16,24,43,51,93,115,133,173,186,195],"tokens.":[17,130],"This":[18],"typically":[19],"demands":[20],"a":[21,31,49,113,202],"number":[23,42],"powerful":[25],"computational":[26],"devices":[27],"in":[28],"addition":[29],"stable":[32],"distributed":[33,204],"training":[34,83,89,182,205],"framework":[35],"accelerate":[37],"training.":[39],"The":[40,131],"growing":[41],"applications":[44],"leveraging":[45],"AI/ML":[46],"led":[47],"scarcity":[50],"expensive":[53],"conventional":[54],"accelerators":[55,126],"(such":[56],"as":[57],"GPUs),":[58],"which":[59,146],"emphasizes":[60],"need":[62],"for":[63,82,207],"alternative":[65],"specialized-accelerators":[66],"that":[67,163,213],"are":[68],"scalable":[69],"and":[70,117,144,153,184,190,230],"cost-efficient.":[71],"AWS":[72,96,124,208,214],"Trainium":[73,97,125,215],"is":[74,98,135,219],"second-generation":[76],"machine":[77],"learning":[78,86],"accelerator":[79],"purposely":[80],"built":[81],"deep":[85],"models.":[87],"However,":[88],"LLMs":[90,120],"with":[91,170,227],"billions":[92],"parameters":[94],"challenging":[99],"due":[100],"its":[102],"relatively":[103],"nascent":[104],"software":[105],"ecosystem.":[106],"In":[107],"this":[108],"paper,":[109],"we":[110,161],"showcase":[111],"HLAT:":[112],"family":[114],"7B":[116],"70B":[118],"decoder-only":[119],"pre-trained":[121],"using":[122,196],"4096":[123],"1.8":[128],"trillion":[129],"performance":[132,229],"HLAT":[134,164],"benchmarked":[136],"against":[137],"popular":[138],"open":[139],"source":[140],"including":[142],"LLaMA":[143],"OpenLLaMA,":[145],"have":[147],"been":[148],"trained":[149],"NVIDIA":[151],"GPUs":[152],"Google":[154],"TPUs,":[155],"respectively.":[156],"On":[157],"various":[158],"evaluation":[159],"tasks,":[160],"show":[162],"achieves":[165],"model":[166,175],"quality":[167],"par":[169],"baselines":[172],"similar":[174],"size.":[176],"We":[177],"also":[178],"open-source":[179],"all":[180],"scripts":[183],"configurations":[185],"HLAT<sup":[187],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[188],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[189],"share":[191],"best":[193],"practice":[194],"NeuronX":[198],"Distributed":[199],"Training":[200],"(NxDT),":[201],"customized":[203],"library":[206],"Trainium.":[209],"Our":[210],"work":[211],"demonstrates":[212],"powered":[216],"by":[217],"NxDT":[218],"able":[220],"successfully":[222],"pre-train":[223],"state-of-the-art":[224],"LLM":[225],"high":[228],"cost-effectiveness.":[231]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
