{"id":"https://openalex.org/W7161273632","doi":"https://doi.org/10.48550/arxiv.2605.15116","title":"DriveCtrl: Conditioned Sim-to-Real Driving Video Generation","display_name":"DriveCtrl: Conditioned Sim-to-Real Driving Video Generation","publication_year":2026,"publication_date":"2026-05-14","ids":{"openalex":"https://openalex.org/W7161273632","doi":"https://doi.org/10.48550/arxiv.2605.15116"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.15116","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15116","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.15116","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136222382","display_name":"Haonan Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Haonan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136236202","display_name":"Yiting Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yiting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086204373","display_name":"Jingkun Chen","orcid":"https://orcid.org/0000-0001-9466-7153"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jingkun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136207965","display_name":"Valentina Donzella","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Donzella, Valentina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019694462","display_name":"Thomas Bashford\u2010Rogers","orcid":"https://orcid.org/0000-0003-4669-0417"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bashford-Rogers, Thomas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136241403","display_name":"Kurt Debattista","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Debattista, Kurt","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9139999747276306,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9139999747276306,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.029600000008940697,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.007199999876320362,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5583000183105469},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.48500001430511475},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.4765999913215637},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.460099995136261},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.40290001034736633},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.34209999442100525},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3393999934196472},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.3314000070095062}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7857000231742859},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5914999842643738},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5846999883651733},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5583000183105469},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.48500001430511475},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.4765999913215637},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.460099995136261},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.34209999442100525},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3393999934196472},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3190999925136566},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.31520000100135803},{"id":"https://openalex.org/C87833898","wikidata":"https://www.wikidata.org/wiki/Q1060280","display_name":"Advanced driver assistance systems","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C151211776","wikidata":"https://www.wikidata.org/wiki/Q2778015","display_name":"Video capture","level":3,"score":0.3066999912261963},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.30660000443458557},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.288100004196167},{"id":"https://openalex.org/C158495155","wikidata":"https://www.wikidata.org/wiki/Q2369151","display_name":"Visual search","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C103910844","wikidata":"https://www.wikidata.org/wiki/Q2631256","display_name":"Video quality","level":3,"score":0.28279998898506165},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.25999999046325684}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.15116","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15116","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.15116","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.15116","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large-scale":[0],"labelled":[1],"driving":[2,10,27,88,122,146,229],"video":[3,37,83,89,95,230],"data":[4,137],"is":[5],"essential":[6],"for":[7,33,43,66,86,174,228],"training":[8],"autonomous":[9],"systems.":[11],"Although":[12],"simulation":[13],"offers":[14],"scalable":[15,136],"and":[16,25,58,112,167,212,219],"fully":[17],"annotated":[18],"data,":[19],"the":[20,109,116,128,149,198,209,225],"domain":[21],"gap":[22,227],"between":[23],"synthetic":[24],"real-world":[26,155],"videos":[28,123,143],"significantly":[29],"limits":[30],"its":[31],"utility":[32],"downstream":[34,175],"deployment.":[35],"Existing":[36],"generation":[38,84,106,138],"methods":[39],"are":[40,64],"not":[41],"well-suited":[42],"this":[44,74,181],"task,":[45,182],"as":[46],"they":[47],"fail":[48],"to":[49],"simultaneously":[50],"preserve":[51],"scene":[52,110],"structure,":[53],"object":[54],"dynamics,":[55],"temporal":[56,217],"consistency,":[57],"visual":[59,150],"realism,":[60,216],"all":[61],"of":[62,115,152,200],"which":[63],"critical":[65],"maintaining":[67],"annotation":[68],"validity":[69],"in":[70,215],"generated":[71,201],"data.":[72],"In":[73],"paper,":[75],"we":[76,183],"present":[77],"DriveCtrl,":[78],"a":[79,93,100,135,153,185],"depth-conditioned":[80],"controllable":[81],"sim-to-real":[82,226],"framework":[85],"realistic":[87,145],"synthesis.":[90],"Built":[91],"upon":[92],"pretrained":[94],"foundation":[96],"model,":[97],"DriveCtrl":[98,206],"introduces":[99],"structure-aware":[101],"adapter":[102],"that":[103,124,140,196,205],"enables":[104],"depth-guided":[105],"while":[107,170],"preserving":[108,171],"layout":[111],"motion":[113],"patterns":[114],"source":[117],"simulation,":[118],"producing":[119],"temporally":[120],"coherent":[121],"remain":[125],"aligned":[126],"with":[127],"original":[129],"simulated":[130],"sequences.":[131],"We":[132],"further":[133],"introduce":[134],"pipeline":[139,158],"transforms":[141],"simulator":[142],"into":[144],"footage":[147],"matching":[148],"style":[151],"target":[154],"dataset.":[156],"The":[157],"supports":[159],"three":[160],"conditioning":[161],"signals:":[162],"structural":[163],"depth,":[164],"reference-dataset":[165],"style,":[166],"text":[168],"prompts,":[169],"frame-level":[172],"annotations":[173],"perception":[176,220],"tasks.":[177],"To":[178],"better":[179],"assess":[180],"propose":[184],"driving-domain-specific":[186],"knowledge-informed":[187],"evaluation":[188],"metric":[189],"called":[190],"Driving":[191],"Video":[192],"Realism":[193],"Score":[194],"(DVRS)":[195],"assesses":[197],"realism":[199],"videos.":[202],"Experiments":[203],"demonstrate":[204],"consistently":[207],"outperforms":[208],"base":[210],"model":[211],"competing":[213],"alternatives":[214],"quality,":[218],"task":[221],"performance,":[222],"substantially":[223],"narrowing":[224],"generation.":[231]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-16T00:00:00"}
