{"id":"https://openalex.org/W4405173645","doi":"https://doi.org/10.1109/iccv51701.2025.02361","title":"UniMLVG: Unified Framework for Multi-View Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving","display_name":"UniMLVG: Unified Framework for Multi-View Long Video Generation with Comprehensive Control Capabilities for Autonomous Driving","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4405173645","doi":"https://doi.org/10.1109/iccv51701.2025.02361"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.02361","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02361","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.04842","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100403640","display_name":"Rui Chen","orcid":"https://orcid.org/0000-0002-4041-4131"},"institutions":[{"id":"https://openalex.org/I2801975132","display_name":"Ocean Institute","ror":"https://ror.org/03574zr06","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I2801975132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rui Chen","raw_affiliation_strings":["Advanced Ocean Institute, Southeast University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Advanced Ocean Institute, Southeast University,China","institution_ids":["https://openalex.org/I2801975132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034234007","display_name":"Zehuan Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zehuan Wu","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100605128","display_name":"Yichen Liu","orcid":"https://orcid.org/0000-0001-7844-7337"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yichen Liu","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085786931","display_name":"Yuxin Guo","orcid":"https://orcid.org/0000-0002-4244-1408"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuxin Guo","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078698697","display_name":"Jingcheng Ni","orcid":"https://orcid.org/0000-0001-6276-6296"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jingcheng Ni","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051260936","display_name":"Haifeng Xia","orcid":"https://orcid.org/0000-0002-1005-1894"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haifeng Xia","raw_affiliation_strings":["School of Automation, Southeast University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Automation, Southeast University,China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025016953","display_name":"Siyu Xia","orcid":"https://orcid.org/0000-0002-0953-6501"},"institutions":[{"id":"https://openalex.org/I2801975132","display_name":"Ocean Institute","ror":"https://ror.org/03574zr06","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I2801975132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siyu Xia","raw_affiliation_strings":["Advanced Ocean Institute, Southeast University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Advanced Ocean Institute, Southeast University,China","institution_ids":["https://openalex.org/I2801975132"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100403640"],"corresponding_institution_ids":["https://openalex.org/I2801975132"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00111356,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"25453","last_page":"25463"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.627323567867279},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5914075374603271},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.33450984954833984},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30291157960891724},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.19084519147872925}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.627323567867279},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5914075374603271},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.33450984954833984},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30291157960891724},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.19084519147872925}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.02361","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02361","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2412.04842","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.04842","pdf_url":"https://arxiv.org/pdf/2412.04842","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.04842","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.04842","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.04842","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.04842","pdf_url":"https://arxiv.org/pdf/2412.04842","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5947948557","display_name":null,"funder_award_id":"62406068","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0],"creation":[1],"of":[2,17,89,113,157],"diverse":[3],"and":[4,14,55,73,87,161],"realistic":[5],"driving":[6,20,27,57],"scenarios":[7],"has":[8],"become":[9],"essential":[10],"to":[11,43,106,131,146],"enhance":[12],"perception":[13],"planning":[15],"capabilities":[16],"the":[18,60,85,132,147],"autonomous":[19],"system.":[21],"However,":[22],"generating":[23],"long-duration,":[24],"surround-view":[25],"consistent":[26],"videos":[28,48,58,129],"remains":[29],"a":[30,39,66],"significant":[31],"challenge.":[32],"To":[33],"address":[34],"this,":[35],"we":[36,94],"present":[37],"UniMLVG,":[38],"unified":[40],"framework":[41,154],"designed":[42],"generate":[44],"extended":[45],"street":[46],"multi-perspective":[47],"under":[49],"precise":[50],"control.":[51],"By":[52],"integrating":[53],"single-":[54],"multi-view":[56,103,128],"into":[59],"training":[61,81],"data,":[62],"our":[63,124,153],"approach":[64,101],"updates":[65],"DiT-based":[67],"diffusion":[68],"model":[69],"equipped":[70],"with":[71,79,150],"cross-frame":[72],"cross-view":[74],"modules":[75],"across":[76],"three":[77],"stages":[78],"multi":[80],"objectives,":[82],"substantially":[83],"boosting":[84],"diversity":[86],"quality":[88],"generated":[90],"visual":[91],"content.":[92],"Importantly,":[93],"propose":[95],"an":[96],"innovative":[97],"explicit":[98],"viewpoint":[99],"modeling":[100],"for":[102],"video":[104],"generation":[105],"effectively":[107],"improve":[108],"motion":[109],"transition":[110],"consistency.":[111],"Capable":[112],"handling":[114],"various":[115],"input":[116],"reference":[117],"formats":[118],"(e.g.,":[119],"text,":[120],"images,":[121],"or":[122,141],"video),":[123],"UniMLVG":[125],"generates":[126],"high-quality":[127],"according":[130],"corresponding":[133],"condition":[134],"constraints":[135],"such":[136],"as":[137],"3D":[138],"bounding":[139],"boxes":[140],"frame-level":[142],"text":[143],"descriptions.":[144],"Compared":[145],"best":[148],"models":[149],"similar":[151],"capabilities,":[152],"achieves":[155],"improvements":[156],"48.2%":[158],"in":[159,163],"FID":[160],"35.2%":[162],"FVD.":[164]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
