{"id":"https://openalex.org/W7162779873","doi":"https://doi.org/10.48550/arxiv.2605.29471","title":"V2XCrafter: Learning to Generate Driving Scene Across Agents","display_name":"V2XCrafter: Learning to Generate Driving Scene Across Agents","publication_year":2026,"publication_date":"2026-05-28","ids":{"openalex":"https://openalex.org/W7162779873","doi":"https://doi.org/10.48550/arxiv.2605.29471"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.29471","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.29471","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.29471","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137394039","display_name":"Yihang Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Yihang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137392999","display_name":"Yu Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137346567","display_name":"Senkang Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Senkang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137370938","display_name":"Yanan Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Yanan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137355258","display_name":"Zihan Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Zihan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137342017","display_name":"Sam Kwong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwong, Sam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137323222","display_name":"Yuguang Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Yuguang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.32440000772476196,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.32440000772476196,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.16359999775886536,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.11479999870061874,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7549999952316284},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6187999844551086},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5126000046730042},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.45840001106262207},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4366999864578247},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4260999858379364},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4235999882221222},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.3472000062465668},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.32690000534057617}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7549999952316284},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7468000054359436},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6187999844551086},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5379999876022339},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5126000046730042},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.45840001106262207},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4366999864578247},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4260999858379364},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4235999882221222},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41990000009536743},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.38929998874664307},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.3472000062465668},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3310000002384186},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3260999917984009},{"id":"https://openalex.org/C138020889","wikidata":"https://www.wikidata.org/wiki/Q2349659","display_name":"Collaborative learning","level":2,"score":0.3237000107765198},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.31790000200271606},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.296999990940094},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.29089999198913574},{"id":"https://openalex.org/C37279795","wikidata":"https://www.wikidata.org/wiki/Q2492305","display_name":"Consistency model","level":3,"score":0.2874000072479248},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C2779038628","wikidata":"https://www.wikidata.org/wiki/Q7248497","display_name":"Programming by demonstration","level":3,"score":0.27730000019073486},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2605000138282776},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.29471","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.29471","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.29471","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.29471","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Collaborative":[0],"driving":[1,13,24,31,57,108],"systems":[2],"leverage":[3],"vehicle-to-everything":[4],"(V2X)":[5],"communication":[6],"for":[7,41,47,82,102],"multi-agent":[8,56,121],"collaborative":[9,107,194],"perception":[10],"to":[11,137,165],"enhance":[12],"safety,":[14],"yet":[15],"they":[16],"remain":[17],"constrained":[18],"by":[19],"scarce":[20],"annotated":[21],"real-world":[22],"V2X":[23],"datasets":[25],"and":[26,69,105,159,181],"limited":[27],"generalization":[28],"across":[29,75,110,187],"diverse":[30],"conditions.":[32],"While":[33],"image":[34],"generation":[35,67],"technology":[36],"offers":[37],"a":[38,119,126,149,155],"feasible":[39],"solution":[40],"data":[42],"augmentation,":[43],"existing":[44],"methods":[45],"tailored":[46],"single-vehicle":[48],"multi-view":[49],"scenarios":[50],"face":[51],"two":[52],"fundamental":[53],"challenges":[54],"in":[55,88],"settings:":[58],"(1)":[59],"the":[60,63,71,78,99,140,167,192],"expansion":[61],"of":[62,80],"learning":[64],"objective":[65],"degrades":[66],"quality,":[68],"(2)":[70],"highly":[72],"dynamic":[73,168],"variations":[74],"agents":[76],"hinder":[77],"modeling":[79],"consistency":[81,186],"physical":[83],"attributes":[84],"(e.g.,":[85],"color,":[86],"category)":[87],"jointly":[89,161],"observed":[90,162],"objects.":[91],"To":[92,143],"bridge":[93],"this":[94],"gap,":[95],"we":[96,117,147],"propose":[97,148],"V2XCrafter,":[98],"first":[100],"framework":[101],"generating":[103],"controllable":[104,182],"realistic":[106],"scene":[109],"agents'":[111,131],"camera":[112,170],"views.":[113],"For":[114],"effective":[115],"learning,":[116],"develop":[118],"progressive":[120],"diffusion":[122],"model":[123,166],"based":[124],"on":[125],"single-agent":[127],"backbone,":[128],"using":[129],"neighboring":[130],"latent":[132],"states":[133],"as":[134],"reference":[135],"signals":[136],"progressively":[138],"guide":[139],"single-to-multi":[141],"diffusion.":[142],"address":[144],"cross-vehicle":[145],"inconsistency,":[146],"cross-agent":[150,169],"attention":[151],"module":[152],"that":[153,176],"leverages":[154],"collaboration":[156],"view":[157,171],"graph":[158],"learnable":[160],"object":[163,196],"representation":[164],"relationships.":[172],"Experiments":[173],"have":[174],"shown":[175],"V2XCrafter":[177],"can":[178],"generate":[179],"high-fidelity":[180],"street":[183],"views":[184],"with":[185],"agents,":[188],"thereby":[189],"effectively":[190],"enhancing":[191],"downstream":[193],"3D":[195],"detection":[197],"tasks.":[198]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-30T00:00:00"}
