{"id":"https://openalex.org/W4417539368","doi":"https://doi.org/10.1109/iccv51701.2025.00954","title":"Controllable and Expressive One-Shot Video Head Swapping","display_name":"Controllable and Expressive One-Shot Video Head Swapping","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4417539368","doi":"https://doi.org/10.1109/iccv51701.2025.00954"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.00954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2506.16852","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chaonan Ji","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chaonan Ji","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jinwei Qi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinwei Qi","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Peng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peng Zhang","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Bang Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bang Zhang","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"last","author":{"id":null,"display_name":"Liefeng Bo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liefeng Bo","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210095624"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39700531,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"10239","last_page":"10250"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.47839999198913574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.47839999198913574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.3937000036239624,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.04600000008940697,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/retargeting","display_name":"Retargeting","score":0.961899995803833},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.6039000153541565},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5867999792098999},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.5626000165939331},{"id":"https://openalex.org/keywords/seam-carving","display_name":"Seam carving","score":0.527999997138977},{"id":"https://openalex.org/keywords/head","display_name":"Head (geology)","score":0.5189999938011169},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.45660001039505005},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.4433000087738037}],"concepts":[{"id":"https://openalex.org/C2780575108","wikidata":"https://www.wikidata.org/wiki/Q7316652","display_name":"Retargeting","level":2,"score":0.961899995803833},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7853999733924866},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6841999888420105},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.6039000153541565},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5867999792098999},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5755000114440918},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.5626000165939331},{"id":"https://openalex.org/C23746413","wikidata":"https://www.wikidata.org/wiki/Q1141379","display_name":"Seam carving","level":3,"score":0.527999997138977},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.5189999938011169},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.45660001039505005},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.4433000087738037},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.4239000082015991},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.39410001039505005},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.362199991941452},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3285999894142151},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3070000112056732},{"id":"https://openalex.org/C2780549717","wikidata":"https://www.wikidata.org/wiki/Q3409626","display_name":"Human head","level":3,"score":0.2892000079154968},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.26919999718666077},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.26750001311302185}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.00954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2506.16852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.16852","pdf_url":"https://arxiv.org/pdf/2506.16852","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2506.16852","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2506.16852","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2506.16852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.16852","pdf_url":"https://arxiv.org/pdf/2506.16852","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"propose":[4,111,151],"a":[5,18,22,26,101,112,152,178],"novel":[6],"diffusion-based":[7],"multi-condition":[8],"controllable":[9],"framework":[10],"for":[11,190],"video":[12],"head":[13,20,44,63,86,120,134,162],"swapping,":[14],"which":[15],"seamlessly":[16],"transplant":[17],"human":[19],"from":[21,123],"static":[23],"image":[24],"into":[25],"dynamic":[27],"video,":[28,38],"while":[29,65,205],"preserving":[30,206],"the":[31,84,165,207,210],"original":[32,168],"body":[33],"and":[34,39,46,72,75,141,148,161,173,225],"background":[35,203],"of":[36,77,167,209],"target":[37],"further":[40,183],"allowing":[41],"to":[42,82,116,130,185,222],"tweak":[43],"expressions":[45,87,169],"movements":[47],"during":[48],"swapping":[49],"as":[50,213,215],"needed.":[51],"Existing":[52],"face-swapping":[53],"methods":[54,79],"mainly":[55],"focus":[56],"on":[57],"localized":[58],"facial":[59],"replacement":[60],"neglecting":[61],"holistic":[62,133],"morphology,":[64],"head-swapping":[66],"approaches":[67],"struggling":[68],"with":[69],"hairstyle":[70],"diversity":[71],"complex":[73,142],"backgrounds,":[74],"none":[76],"these":[78,92],"allow":[80],"users":[81],"modify":[83],"transplanted":[85],"after":[88],"swapping.":[89],"To":[90],"tackle":[91],"challenges,":[93],"our":[94,198],"method":[95,199],"incorporates":[96],"several":[97],"innovative":[98],"strategies":[99],"through":[100],"unified":[102],"latent":[103],"diffusion":[104],"paradigm.":[105],"1)":[106],"Identity-preserving":[107],"context":[108],"fusion:":[109],"We":[110,150],"shape-agnostic":[113],"mask":[114],"strategy":[115,129,181],"explicitly":[117],"disentangle":[118],"foreground":[119],"identity":[121,135,208],"features":[122],"background/body":[124],"contexts,":[125],"combining":[126],"hair":[127,139],"enhancement":[128],"achieve":[131],"robust":[132],"preservation":[136],"across":[137],"diverse":[138],"types":[140],"backgrounds.":[143],"2)":[144],"Expression-aware":[145],"landmark":[146],"retargeting":[147,155,180],"editing:":[149],"disentangled":[153],"3DMM-driven":[154],"module":[156],"that":[157,197],"decouples":[158],"identity,":[159],"expression,":[160],"poses,":[163],"minimizing":[164],"impact":[166],"in":[170,201],"input":[171],"images":[172],"supporting":[174],"expression":[175,188,218],"editing.":[176],"While":[177],"scale-aware":[179],"is":[182],"employed":[184],"minimize":[186],"cross-identity":[187],"distortion":[189],"higher":[191],"transfer":[192,219],"precision.":[193],"Experimental":[194],"results":[195],"demonstrate":[196],"excels":[200],"seamless":[202],"integration":[204],"source":[211],"portrait,":[212],"well":[214],"showcasing":[216],"superior":[217],"capabilities":[220],"applicable":[221],"both":[223],"real":[224],"virtual":[226],"characters.":[227]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
