{"id":"https://openalex.org/W7131090182","doi":"https://doi.org/10.48550/arxiv.2602.18007","title":"Joint Training on AMD and NVIDIA GPUs","display_name":"Joint Training on AMD and NVIDIA GPUs","publication_year":2026,"publication_date":"2026-02-20","ids":{"openalex":"https://openalex.org/W7131090182","doi":"https://doi.org/10.48550/arxiv.2602.18007"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.18007","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126628277","display_name":"Jon Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hu, Jon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126597071","display_name":"Thomas Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126593539","display_name":"Jing Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126607667","display_name":"Zhendong Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Zhendong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5126628277"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.29490000009536743,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.29490000009536743,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.1290999948978424,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.11789999902248383,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/homogeneous","display_name":"Homogeneous","score":0.6766999959945679},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6207000017166138},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.6093000173568726},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5874000191688538},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5663999915122986},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5620999932289124},{"id":"https://openalex.org/keywords/data-transmission","display_name":"Data transmission","score":0.45669999718666077},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4025999903678894}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8317999839782715},{"id":"https://openalex.org/C66882249","wikidata":"https://www.wikidata.org/wiki/Q169336","display_name":"Homogeneous","level":2,"score":0.6766999959945679},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6207000017166138},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.6093000173568726},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5874000191688538},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5663999915122986},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5620999932289124},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4618000090122223},{"id":"https://openalex.org/C557945733","wikidata":"https://www.wikidata.org/wiki/Q389772","display_name":"Data transmission","level":2,"score":0.45669999718666077},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4025999903678894},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.3797999918460846},{"id":"https://openalex.org/C2775973920","wikidata":"https://www.wikidata.org/wiki/Q3252726","display_name":"Selection algorithm","level":3,"score":0.3474000096321106},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.33500000834465027},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3246000111103058},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.32100000977516174},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29159998893737793},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.26820001006126404},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.18007","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.18007","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.18007","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.18007","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"large":[1],"language":[2],"models":[3],"continue":[4],"to":[5,73,97],"scale,":[6],"training":[7,30,109],"demands":[8],"on":[9,41,84],"compute":[10],"and":[11,52,86,111],"system":[12],"capacity":[13],"grow":[14],"rapidly,":[15],"making":[16],"single-vendor":[17],"homogeneous":[18,105],"clusters":[19],"insufficient.":[20],"This":[21],"paper":[22],"presents":[23],"a":[24,37,69],"technical":[25],"solution":[26],"for":[27],"heterogeneous":[28],"mixed":[29],"in":[31],"AMD-NVIDIA":[32],"environments.":[33],"We":[34],"first":[35],"adopt":[36],"compatibility-oriented":[38],"approach":[39,94],"based":[40],"CPU-Forwarding":[42],"Communication,":[43],"with":[44],"differentiated":[45],"communication":[46],"back-end":[47],"selection":[48],"across":[49],"parallel":[50,54],"groups":[51],"multi-NIC":[53],"data":[55,78],"transfer.":[56],"To":[57],"achieve":[58],"higher":[59],"performance,":[60],"we":[61],"further":[62],"propose":[63],"another":[64],"Device-Direct":[65,92],"Communication":[66,93],"approach,":[67],"integrating":[68],"CPU-offloading":[70],"P2P":[71],"mechanism":[72],"enable":[74],"direct":[75],"cross-vendor":[76],"GPU":[77],"transfer":[79],"without":[80],"host-memory":[81],"staging.":[82],"Experiments":[83],"LLaMA-8B":[85],"Qwen2-7B":[87],"demonstrate":[88],"that":[89],"the":[90,100],"proposed":[91],"achieves":[95],"up":[96],"98%":[98],"of":[99,102],"throughput":[101],"an":[103],"NVIDIA":[104],"system,":[106],"while":[107],"preserving":[108],"stability":[110],"correctness.":[112]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-24T00:00:00"}
