{"id":"https://openalex.org/W4396653841","doi":"https://doi.org/10.48550/arxiv.2405.01481","title":"NeMo-Aligner: Scalable Toolkit for Efficient Model Alignment","display_name":"NeMo-Aligner: Scalable Toolkit for Efficient Model Alignment","publication_year":2024,"publication_date":"2024-05-02","ids":{"openalex":"https://openalex.org/W4396653841","doi":"https://doi.org/10.48550/arxiv.2405.01481"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.01481","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.01481","pdf_url":"https://arxiv.org/pdf/2405.01481","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.01481","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111188854","display_name":"Gerald Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shen, Gerald","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113164702","display_name":"Zhilin Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhilin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065423636","display_name":"Olivier Delalleau","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Delalleau, Olivier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055726090","display_name":"Jiaqi Zeng","orcid":"https://orcid.org/0000-0002-6967-5394"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Jiaqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101373873","display_name":"Yi Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093285949","display_name":"Daniel Egert","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Egert, Daniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101184337","display_name":"Shengyang Sun","orcid":"https://orcid.org/0009-0001-7727-1904"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Shengyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101069767","display_name":"Jimmy Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jimmy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101319375","display_name":"Sahil Jain","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jain, Sahil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089955412","display_name":"Ali Taghibakhshi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taghibakhshi, Ali","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039146131","display_name":"Markel Sanz Ausin","orcid":"https://orcid.org/0000-0002-4526-9252"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ausin, Markel Sanz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001012548","display_name":"Ashwath Aithal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aithal, Ashwath","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5048403564","display_name":"Oleksii Kuchaiev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuchaiev, Oleksii","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5111188854"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9151999950408936,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.9151999950408936,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.9147999882698059,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6967711448669434},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6660459041595459},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.33678144216537476},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.32403379678726196},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.32246243953704834},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.17937466502189636}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6967711448669434},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6660459041595459},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.33678144216537476},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.32403379678726196},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.32246243953704834},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.17937466502189636}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.01481","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.01481","pdf_url":"https://arxiv.org/pdf/2405.01481","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2405.01481","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.01481","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.01481","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.01481","pdf_url":"https://arxiv.org/pdf/2405.01481","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4396653841.pdf","grobid_xml":"https://content.openalex.org/works/W4396653841.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W1982914007","https://openalex.org/W2159583675","https://openalex.org/W1824242903","https://openalex.org/W1493858311","https://openalex.org/W2155470929","https://openalex.org/W2394465510","https://openalex.org/W2111125783"],"abstract_inverted_index":{"Aligning":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"with":[5,79,136,142],"human":[6],"values":[7],"and":[8,16,32,73,82,104,146],"preferences":[9],"is":[10,126,140],"essential":[11],"for":[12,29,51,62,85,128,132],"making":[13],"them":[14],"helpful":[15],"safe.":[17],"However,":[18],"building":[19],"efficient":[20],"tools":[21],"to":[22,58],"perform":[23],"alignment":[24,53,90,116,134],"can":[25,55],"be":[26],"challenging,":[27],"especially":[28],"the":[30,64,115],"largest":[31,65],"most":[33,113],"competent":[34],"LLMs":[35,67],"which":[36],"often":[37],"contain":[38],"tens":[39],"or":[40],"hundreds":[41],"of":[42,44,88,114],"billions":[43],"parameters.":[45],"We":[46],"create":[47],"NeMo-Aligner,":[48],"a":[49,59,119],"toolkit":[50,110],"model":[52,89],"that":[54],"efficiently":[56],"scale":[57],"thousand":[60],"GPUs":[61],"training":[63],"open-source":[66],"such":[68,91],"as":[69],"Nemotron":[70],"4":[71],"340B":[72],"Llama":[74],"3.1":[75],"405B.":[76],"NeMo-Aligner":[77,125],"comes":[78],"highly":[80],"optimized":[81],"scalable":[83],"implementations":[84],"major":[86],"paradigms":[87],"as:":[92],"Reinforcement":[93],"Learning":[94],"from":[95],"Human":[96],"Feedback":[97],"(RLHF),":[98],"Direct":[99],"Preference":[100],"Optimization":[101],"(DPO),":[102],"SteerLM,":[103],"Self-Play":[105],"Fine-Tuning":[106,122],"(SPIN).":[107],"Additionally,":[108],"our":[109],"supports":[111],"running":[112],"techniques":[117,135],"in":[118],"Parameter":[120],"Efficient":[121],"(PEFT)":[123],"setting.":[124],"designed":[127],"extensibility,":[129],"allowing":[130],"support":[131],"other":[133],"minimal":[137],"effort.":[138],"It":[139],"open-sourced":[141],"Apache":[143],"2.0":[144],"License":[145],"we":[147],"invite":[148],"community":[149],"contributions":[150],"at":[151],"https://github.com/NVIDIA/NeMo-Aligner":[152]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
