{"id":"https://openalex.org/W4407690164","doi":"https://doi.org/10.1109/iccv51701.2025.01387","title":"Phantom: Subject-Consistent Video Generation via Cross-Modal Alignment","display_name":"Phantom: Subject-Consistent Video Generation via Cross-Modal Alignment","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4407690164","doi":"https://doi.org/10.1109/iccv51701.2025.01387"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.01387","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01387","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2502.11079","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100632236","display_name":"Lijie Liu","orcid":"https://orcid.org/0009-0004-4146-6808"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Lijie Liu","raw_affiliation_strings":["ByteDance,Intelligent Creation Team"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance,Intelligent Creation Team","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101775230","display_name":"Tianxiang Ma","orcid":"https://orcid.org/0009-0008-6181-3634"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tianxiang Ma","raw_affiliation_strings":["ByteDance,Intelligent Creation Team"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance,Intelligent Creation Team","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023195809","display_name":"Benjamin Li","orcid":"https://orcid.org/0000-0002-1783-7013"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Bingchuan Li","raw_affiliation_strings":["ByteDance,Intelligent Creation Team"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance,Intelligent Creation Team","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056344041","display_name":"Zhuowei Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhuowei Chen","raw_affiliation_strings":["ByteDance,Intelligent Creation Team"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance,Intelligent Creation Team","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100320739","display_name":"Jiawei Liu","orcid":"https://orcid.org/0000-0001-9940-6366"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jiawei Liu","raw_affiliation_strings":["ByteDance,Intelligent Creation Team"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance,Intelligent Creation Team","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Gen Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Gen Li","raw_affiliation_strings":["ByteDance,Intelligent Creation Team"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance,Intelligent Creation Team","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Siyu Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Siyu Zhou","raw_affiliation_strings":["ByteDance,Intelligent Creation Team"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance,Intelligent Creation Team","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qian He","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Qian He","raw_affiliation_strings":["ByteDance,Intelligent Creation Team"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance,Intelligent Creation Team","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xinglong Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Xinglong Wu","raw_affiliation_strings":["ByteDance,Intelligent Creation Team"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance,Intelligent Creation Team","institution_ids":["https://openalex.org/I4210153682"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100632236"],"corresponding_institution_ids":["https://openalex.org/I4210153682"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01847322,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"14951","last_page":"14961"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7077001333236694},{"id":"https://openalex.org/keywords/imaging-phantom","display_name":"Imaging phantom","score":0.6655080318450928},{"id":"https://openalex.org/keywords/subject","display_name":"Subject (documents)","score":0.5304210782051086},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.51357102394104},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.28031229972839355},{"id":"https://openalex.org/keywords/optics","display_name":"Optics","score":0.16850066184997559},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.11461469531059265}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7077001333236694},{"id":"https://openalex.org/C104293457","wikidata":"https://www.wikidata.org/wiki/Q28324852","display_name":"Imaging phantom","level":2,"score":0.6655080318450928},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.5304210782051086},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.51357102394104},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.28031229972839355},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.16850066184997559},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.11461469531059265},{"id":"https://openalex.org/C161191863","wikidata":"https://www.wikidata.org/wiki/Q199655","display_name":"Library science","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.01387","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01387","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2502.11079","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2502.11079","pdf_url":"https://arxiv.org/pdf/2502.11079","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2502.11079","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2502.11079","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2502.11079","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2502.11079","pdf_url":"https://arxiv.org/pdf/2502.11079","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4388258507","https://openalex.org/W2392013855","https://openalex.org/W2417440389","https://openalex.org/W4244157427","https://openalex.org/W2734382758","https://openalex.org/W4385556839","https://openalex.org/W4378746257"],"abstract_inverted_index":{"The":[0,112],"continuous":[1],"development":[2],"of":[3,48,56,123],"foundational":[4],"models":[5],"for":[6,81],"video":[7,16,78,118,154],"generation":[8,17,79,119,155],"is":[9],"evolving":[10],"into":[11],"various":[12],"applications,":[13],"with":[14],"subject-consistent":[15,38,117],"still":[18],"in":[19,51,148],"the":[20,46,53,96],"exploratory":[21],"stage.":[22],"We":[23,43],"refer":[24],"to":[25,104],"this":[26,71],"as":[27],"Subject-to-Video,":[28],"which":[29],"extracts":[30],"subject":[31,146],"elements":[32],"from":[33],"reference":[34],"images":[35],"and":[36,58,62,67,84,91,101,127],"generates":[37],"videos":[39],"following":[40],"textual":[41],"instructions.":[42],"believe":[44],"that":[45,133],"essence":[47],"subject-to-video":[49],"lies":[50],"balancing":[52],"dual-modal":[54],"prompts":[55],"text":[57,66],"image,":[59],"thereby":[60],"deeply":[61],"simultaneously":[63],"aligning":[64],"both":[65,82],"visual":[68],"content.":[69],"To":[70],"end,":[72],"we":[73,94,144],"propose":[74],"Phantom,":[75],"a":[76],"unified":[77],"framework":[80],"single-":[83],"multi-subject":[85,128],"references.":[86],"Building":[87],"on":[88],"existing":[89,152],"text-to-video":[90],"image-to-video":[92],"architectures,":[93],"redesign":[95],"joint":[97],"text-image":[98],"injection":[99],"model":[100],"drive":[102],"it":[103],"learn":[105],"cross-modal":[106],"alignment":[107],"via":[108],"text-image-video":[109],"triplet":[110],"data.":[111],"proposed":[113],"method":[114,135],"achieves":[115],"high-fidelity":[116],"while":[120,156],"addressing":[121],"issues":[122],"image":[124],"content":[125],"leakage":[126],"confusion.":[129],"Evaluation":[130],"results":[131],"indicate":[132],"our":[134],"outperforms":[136],"other":[137],"state-of-the-art":[138],"closed-source":[139],"commercial":[140],"solutions.":[141],"In":[142],"particular,":[143],"emphasize":[145],"consistency":[147],"human":[149],"generation,":[150],"covering":[151],"ID-preserving":[153],"offering":[157],"enhanced":[158],"advantages.":[159]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
