{"id":"https://openalex.org/W4408902926","doi":"https://doi.org/10.1145/3676641.3716280","title":"Vela: A Virtualized LLM Training System with GPU Direct RoCE","display_name":"Vela: A Virtualized LLM Training System with GPU Direct RoCE","publication_year":2025,"publication_date":"2025-03-27","ids":{"openalex":"https://openalex.org/W4408902926","doi":"https://doi.org/10.1145/3676641.3716280"},"language":"en","primary_location":{"id":"doi:10.1145/3676641.3716280","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3676641.3716280","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080887047","display_name":"Apoorve Mohan","orcid":"https://orcid.org/0000-0003-3789-5453"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Apoorve Mohan","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074246753","display_name":"R. E. Walkup","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert Walkup","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108778429","display_name":"Bengi Kara\u00e7ali","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bengi Karacali","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089889150","display_name":"Ming-Hung Chen","orcid":"https://orcid.org/0000-0002-6946-2313"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ming-hung Chen","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043483012","display_name":"Abdullah Kayi","orcid":"https://orcid.org/0000-0001-5909-9891"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdullah Kayi","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015314206","display_name":"Liran Schour","orcid":"https://orcid.org/0000-0002-6163-0060"},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Liran Schour","raw_affiliation_strings":["IBM Research, Haifa, Israel"],"affiliations":[{"raw_affiliation_string":"IBM Research, Haifa, Israel","institution_ids":["https://openalex.org/I4210167297"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042871126","display_name":"Shweta Salaria","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shweta Salaria","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036073088","display_name":"Sophia Wen","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sophia Wen","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108479300","display_name":"I\u2010Hsin Chung","orcid":"https://orcid.org/0000-0003-4555-9257"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"I-hsin Chung","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075027799","display_name":"Md. Abdul Alim","orcid":"https://orcid.org/0000-0002-8557-6783"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdul Alim","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090699270","display_name":"Constantinos Evangelinos","orcid":"https://orcid.org/0000-0002-7906-8651"},"institutions":[{"id":"https://openalex.org/I4210087032","display_name":"Cambridge Scientific (United States)","ror":"https://ror.org/001s4dh65","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Constantinos Evangelinos","raw_affiliation_strings":["IBM Research, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210087032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102002195","display_name":"Lixiang Luo","orcid":"https://orcid.org/0000-0001-9850-3201"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lixiang Luo","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013425123","display_name":"M.B. Dombrowa","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marc Dombrowa","raw_affiliation_strings":["IBM Research, Yortown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yortown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077866603","display_name":"Laurent Schares","orcid":"https://orcid.org/0000-0002-9506-8467"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Laurent Schares","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100493857","display_name":"Ali Sydney","orcid":"https://orcid.org/0000-0003-4410-2175"},"institutions":[{"id":"https://openalex.org/I4210087032","display_name":"Cambridge Scientific (United States)","ror":"https://ror.org/001s4dh65","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087032"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ali Sydney","raw_affiliation_strings":["IBM Research, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210087032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068041419","display_name":"Pavlos Maniotis","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pavlos Maniotis","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002971586","display_name":"Sandhya Koteshwara","orcid":"https://orcid.org/0000-0003-3182-219X"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sandhya Koteshwara","raw_affiliation_strings":["IBM Research, Yortown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yortown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109744632","display_name":"Brent Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210093186","display_name":"Cloud Pharmaceuticals (United States)","ror":"https://ror.org/00medbt67","country_code":"US","type":"company","lineage":["https://openalex.org/I4210093186"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brent Tang","raw_affiliation_strings":["IBM Cloud, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Cloud, Rochester, NY, USA","institution_ids":["https://openalex.org/I4210093186"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100493882","display_name":"Joel Belog","orcid":null},"institutions":[{"id":"https://openalex.org/I133738476","display_name":"University of Massachusetts Lowell","ror":"https://ror.org/03hamhx47","country_code":"US","type":"education","lineage":["https://openalex.org/I133738476"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joel Belog","raw_affiliation_strings":["IBM Cloud, Lowell, MA, USA"],"affiliations":[{"raw_affiliation_string":"IBM Cloud, Lowell, MA, USA","institution_ids":["https://openalex.org/I133738476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050690920","display_name":"Rei Odaira","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rei Odaira","raw_affiliation_strings":["IBM Cloud, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"IBM Cloud, Austin, TX, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045054131","display_name":"Vasily Tarasov","orcid":"https://orcid.org/0000-0003-1424-9977"},"institutions":[{"id":"https://openalex.org/I4210085935","display_name":"IBM Research - Almaden","ror":"https://ror.org/005w8dd04","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210085935","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vasily Tarasov","raw_affiliation_strings":["IBM Research, Almaden, CA, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Almaden, CA, USA","institution_ids":["https://openalex.org/I4210085935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100493833","display_name":"Eran Gampel","orcid":null},"institutions":[{"id":"https://openalex.org/I4210167297","display_name":"IBM Research - Haifa","ror":"https://ror.org/05rw9t746","country_code":"IL","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210167297"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Eran Gampel","raw_affiliation_strings":["IBM Cloud, Haifa, Israel"],"affiliations":[{"raw_affiliation_string":"IBM Cloud, Haifa, Israel","institution_ids":["https://openalex.org/I4210167297"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100493881","display_name":"Drew Thorstensen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210093186","display_name":"Cloud Pharmaceuticals (United States)","ror":"https://ror.org/00medbt67","country_code":"US","type":"company","lineage":["https://openalex.org/I4210093186"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Drew Thorstensen","raw_affiliation_strings":["IBM Cloud, Durham, NC, USA"],"affiliations":[{"raw_affiliation_string":"IBM Cloud, Durham, NC, USA","institution_ids":["https://openalex.org/I4210093186"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005109790","display_name":"Talia Gershon","orcid":"https://orcid.org/0000-0001-9408-1080"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Talia Gershon","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033227162","display_name":"Seetharami Seelam","orcid":"https://orcid.org/0000-0002-7595-3477"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Seetharami Seelam","raw_affiliation_strings":["IBM Research, Yorktown Heights, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":25,"corresponding_author_ids":["https://openalex.org/A5080887047"],"corresponding_institution_ids":["https://openalex.org/I1341412227"],"apc_list":null,"apc_paid":null,"fwci":7.3831,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.96014963,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1348","last_page":"1364"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vela","display_name":"Vela","score":0.6689286231994629},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6581199169158936},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6118520498275757},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.5250290036201477},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.45851942896842957},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3806134760379791},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.13364526629447937},{"id":"https://openalex.org/keywords/meteorology","display_name":"Meteorology","score":0.08995065093040466}],"concepts":[{"id":"https://openalex.org/C2776444349","wikidata":"https://www.wikidata.org/wiki/Q284849","display_name":"Vela","level":3,"score":0.6689286231994629},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6581199169158936},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6118520498275757},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.5250290036201477},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.45851942896842957},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3806134760379791},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.13364526629447937},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.08995065093040466},{"id":"https://openalex.org/C110363677","wikidata":"https://www.wikidata.org/wiki/Q4360","display_name":"Pulsar","level":2,"score":0.0},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3676641.3716280","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3676641.3716280","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4000000059604645,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W2169631286","https://openalex.org/W2985671171","https://openalex.org/W2999479397","https://openalex.org/W3197720002","https://openalex.org/W4220997874","https://openalex.org/W4312005085","https://openalex.org/W4380874786","https://openalex.org/W4391670496","https://openalex.org/W4401176521","https://openalex.org/W4401176799"],"related_works":["https://openalex.org/W2090833794","https://openalex.org/W3104828863","https://openalex.org/W1892018140","https://openalex.org/W2066659506","https://openalex.org/W2060813490","https://openalex.org/W2067883413","https://openalex.org/W4321506505","https://openalex.org/W2952598107","https://openalex.org/W4214546407","https://openalex.org/W1647360307"],"abstract_inverted_index":{"Vela":[0,26],"is":[1],"a":[2,18,81,111,134,150],"cloud-native":[3],"system":[4,79,118],"designed":[5],"for":[6,100,119],"LLM":[7],"training":[8,133],"workloads":[9],"built":[10],"using":[11,140],"off-the-shelf":[12],"hardware,":[13],"Linux":[14],"KVM-based":[15],"virtualization,":[16],"and":[17,36,71,76,93,96,143],"virtualized":[19],"RDMA":[20],"over":[21,80],"Converged":[22],"Ethernet":[23],"(RoCE)":[24],"network.":[25],"virtual":[27],"machines":[28],"(VMs)":[29],"support":[30],"peer-to-peer":[31],"DMA":[32],"between":[33],"the":[34,60,66,78,87,97,117,129,154],"GPUs":[35],"SRIOV-based":[37],"network":[38],"interface.":[39],"In":[40],"this":[41],"paper,":[42,67],"we":[43,68,115,125],"share":[44,69],"Vela's":[45],"key":[46],"architectural":[47],"aspects":[48],"with":[49,153],"details":[50],"from":[51,73],"an":[52],"NVIDIA":[53],"A100":[54],"GPU-based":[55],"deployment":[56],"in":[57],"one":[58],"of":[59,89,128],"IBM":[61],"Cloud":[62],"data":[63],"centers.":[64],"Throughout":[65],"insights":[70],"experiences":[72],"designing,":[74],"building,":[75],"operating":[77],"~2.5":[82],"year":[83],"timeframe":[84],"to":[85,110,149],"highlight":[86],"capabilities":[88],"readily":[90],"available":[91],"software":[92],"hardware":[94],"technologies":[95],"improvement":[98],"opportunities":[99],"future":[101],"AI":[102,106],"systems,":[103],"thereby":[104],"making":[105],"infrastructure":[107],"more":[108],"accessible":[109],"broader":[112],"community.":[113],"As":[114],"evaluated":[116],"performance":[120],"at":[121],"~1500":[122],"GPU":[123,146],"scale,":[124],"achieved":[126],"~80%":[127],"ideal":[130],"throughput":[131],"while":[132],"50":[135],"billion":[136],"parameter":[137],"decoder":[138],"model":[139,141],"parallelism,":[142],"~70%":[144],"per":[145],"FLOPS":[147],"compared":[148],"single":[151],"VM":[152],"High-Performance":[155],"Linpack":[156],"benchmark.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
