{"id":"https://openalex.org/W6891981843","doi":"https://doi.org/10.48550/arxiv.2503.12964","title":"Training Video Foundation Models with NVIDIA NeMo","display_name":"Training Video Foundation Models with NVIDIA NeMo","publication_year":2025,"publication_date":"2025-03-17","ids":{"openalex":"https://openalex.org/W6891981843","doi":"https://doi.org/10.48550/arxiv.2503.12964"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2503.12964","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.12964","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2503.12964","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Patel, Zeeshan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Patel, Zeeshan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"He, Ethan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Ethan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Mannan, Parth","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mannan, Parth","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ren, Xiaowei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Xiaowei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wolf, Ryan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wolf, Ryan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Agarwal, Niket","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agarwal, Niket","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Huffman, Jacob","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huffman, Jacob","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Zhuoyao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhuoyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Carl","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Carl","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Chang, Jack","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Jack","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Bai, Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Yan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Huang, Tommy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Tommy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Linnan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Linnan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jain, Sahil","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jain, Sahil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ramasamy, Shanmugam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramasamy, Shanmugam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jennings, Joseph","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jennings, Joseph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sirazitdinova, Ekaterina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sirazitdinova, Ekaterina","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sudakov, Oleg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sudakov, Oleg","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ma, Mingyuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Mingyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Chen, Bobby","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Bobby","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Lin, Forrest","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Forrest","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sabavat, Vasanth Rao Naik","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sabavat, Vasanth Rao Naik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Niverty, Sriharsha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Niverty, Sriharsha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ou, Rong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ou, Rong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Bhattacharya, Pallab","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhattacharya, Pallab","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Page, David","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Page, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tajbakhsh, Nima","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tajbakhsh, Nima","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Aithal, Ashwath","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aithal, Ashwath","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":29,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.47130000591278076,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.47130000591278076,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.09200000017881393,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08540000021457672,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nucleofection","display_name":"Nucleofection","score":0.4546000063419342},{"id":"https://openalex.org/keywords/gestational-period","display_name":"Gestational period","score":0.3808000087738037},{"id":"https://openalex.org/keywords/tsg101","display_name":"TSG101","score":0.34689998626708984},{"id":"https://openalex.org/keywords/dysgeusia","display_name":"Dysgeusia","score":0.3294999897480011},{"id":"https://openalex.org/keywords/diafiltration","display_name":"Diafiltration","score":0.3285999894142151},{"id":"https://openalex.org/keywords/liquation","display_name":"Liquation","score":0.30790001153945923},{"id":"https://openalex.org/keywords/triacetin","display_name":"Triacetin","score":0.3052000105381012},{"id":"https://openalex.org/keywords/proteogenomics","display_name":"Proteogenomics","score":0.3018999993801117},{"id":"https://openalex.org/keywords/fusible-alloy","display_name":"Fusible alloy","score":0.2971999943256378}],"concepts":[{"id":"https://openalex.org/C144251240","wikidata":"https://www.wikidata.org/wiki/Q7068229","display_name":"Nucleofection","level":4,"score":0.4546000063419342},{"id":"https://openalex.org/C2992336715","wikidata":"https://www.wikidata.org/wiki/Q63431143","display_name":"Gestational period","level":4,"score":0.3808000087738037},{"id":"https://openalex.org/C2778283623","wikidata":"https://www.wikidata.org/wiki/Q18032200","display_name":"TSG101","level":5,"score":0.34689998626708984},{"id":"https://openalex.org/C2777054765","wikidata":"https://www.wikidata.org/wiki/Q6402731","display_name":"Dysgeusia","level":3,"score":0.3294999897480011},{"id":"https://openalex.org/C18743360","wikidata":"https://www.wikidata.org/wiki/Q1208096","display_name":"Diafiltration","level":4,"score":0.3285999894142151},{"id":"https://openalex.org/C180938184","wikidata":"https://www.wikidata.org/wiki/Q2142270","display_name":"Liquation","level":3,"score":0.30790001153945923},{"id":"https://openalex.org/C2776781215","wikidata":"https://www.wikidata.org/wiki/Q83253","display_name":"Triacetin","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C145741570","wikidata":"https://www.wikidata.org/wiki/Q7251534","display_name":"Proteogenomics","level":5,"score":0.3018999993801117},{"id":"https://openalex.org/C133074676","wikidata":"https://www.wikidata.org/wiki/Q428729","display_name":"Fusible alloy","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C2777968768","wikidata":"https://www.wikidata.org/wiki/Q1280161","display_name":"Emperipolesis","level":4,"score":0.2969000041484833},{"id":"https://openalex.org/C2777158700","wikidata":"https://www.wikidata.org/wiki/Q1419356","display_name":"Hyporeflexia","level":3,"score":0.29109999537467957},{"id":"https://openalex.org/C104545631","wikidata":"https://www.wikidata.org/wiki/Q464858","display_name":"Demotion","level":3,"score":0.2888999879360199},{"id":"https://openalex.org/C2781032047","wikidata":"https://www.wikidata.org/wiki/Q938793","display_name":"Articular cartilage damage","level":5,"score":0.287200003862381},{"id":"https://openalex.org/C2777935831","wikidata":"https://www.wikidata.org/wiki/Q3144949","display_name":"Hemopericardium","level":3,"score":0.2833000123500824},{"id":"https://openalex.org/C135979968","wikidata":"https://www.wikidata.org/wiki/Q609809","display_name":"Protein isoform","level":5,"score":0.2825999855995178},{"id":"https://openalex.org/C2777742743","wikidata":"https://www.wikidata.org/wiki/Q19904005","display_name":"Durvalumab","level":5,"score":0.27790001034736633},{"id":"https://openalex.org/C2909186138","wikidata":"https://www.wikidata.org/wiki/Q1500373","display_name":"Hyperlactatemia","level":2,"score":0.27230000495910645},{"id":"https://openalex.org/C2779627259","wikidata":"https://www.wikidata.org/wiki/Q779763","display_name":"Pretext","level":3,"score":0.2721000015735626},{"id":"https://openalex.org/C2779645999","wikidata":"https://www.wikidata.org/wiki/Q1858477","display_name":"Frugality","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C2780904289","wikidata":"https://www.wikidata.org/wiki/Q426498","display_name":"Tantalum carbide","level":3,"score":0.2660999894142151},{"id":"https://openalex.org/C2775917601","wikidata":"https://www.wikidata.org/wiki/Q1424050","display_name":"Sclerodactyly","level":4,"score":0.26409998536109924},{"id":"https://openalex.org/C2778843634","wikidata":"https://www.wikidata.org/wiki/Q2013780","display_name":"Ocrelizumab","level":4,"score":0.2623000144958496},{"id":"https://openalex.org/C2776356786","wikidata":"https://www.wikidata.org/wiki/Q1048573","display_name":"Tubulopathy","level":3,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2503.12964","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.12964","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2503.12964","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.12964","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Video":[0],"Foundation":[1],"Models":[2],"(VFMs)":[3],"have":[4],"recently":[5],"been":[6],"used":[7],"to":[8,13],"simulate":[9],"the":[10],"real":[11],"world":[12],"train":[14],"physical":[15],"AI":[16],"systems":[17],"and":[18,58,64,80],"develop":[19],"creative":[20],"visual":[21],"experiences.":[22],"However,":[23],"there":[24],"are":[25],"significant":[26],"challenges":[27],"in":[28],"training":[29,45,63,79],"large-scale,":[30],"high":[31],"quality":[32],"VFMs":[33],"that":[34],"can":[35],"generate":[36],"high-quality":[37],"videos.":[38],"We":[39,66],"present":[40],"a":[41,69],"scalable,":[42],"open-source":[43],"VFM":[44,78],"pipeline":[46],"with":[47],"NVIDIA":[48],"NeMo,":[49],"providing":[50],"accelerated":[51],"video":[52,60],"dataset":[53],"curation,":[54],"multimodal":[55],"data":[56],"loading,":[57],"parallelized":[59],"diffusion":[61],"model":[62],"inference.":[65,81],"also":[67],"provide":[68],"comprehensive":[70],"performance":[71],"analysis":[72],"highlighting":[73],"best":[74],"practices":[75],"for":[76],"efficient":[77]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
