{"id":"https://openalex.org/W4403791702","doi":"https://doi.org/10.1145/3664647.3680975","title":"Audio-Driven Identity Manipulation for Face Inpainting","display_name":"Audio-Driven Identity Manipulation for Face Inpainting","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791702","doi":"https://doi.org/10.1145/3664647.3680975"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680975","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680975","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110574075","display_name":"Yuqi Sun","orcid":"https://orcid.org/0000-0002-7179-5045"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuqi Sun","raw_affiliation_strings":["Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037937279","display_name":"Qing Lin","orcid":"https://orcid.org/0000-0002-3808-3492"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Qing Lin","raw_affiliation_strings":["I2R and CFAR, Agency for Science, Technology and Research (A*STAR), Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"I2R and CFAR, Agency for Science, Technology and Research (A*STAR), Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I115228651","https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048621299","display_name":"Weimin Tan","orcid":"https://orcid.org/0000-0001-7677-4772"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weimin Tan","raw_affiliation_strings":["Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081115024","display_name":"Bo Yan","orcid":"https://orcid.org/0000-0003-0256-9682"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Yan","raw_affiliation_strings":["Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5110574075"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19219129,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6123","last_page":"6132"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inpainting","display_name":"Inpainting","score":0.7521955370903015},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.7024309635162354},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6115672588348389},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.6017748117446899},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4514042139053345},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4151412546634674},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.32618382573127747},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.2727130651473999},{"id":"https://openalex.org/keywords/aesthetics","display_name":"Aesthetics","score":0.22288191318511963},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.198043555021286},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10447406768798828},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.062178075313568115}],"concepts":[{"id":"https://openalex.org/C11727466","wikidata":"https://www.wikidata.org/wiki/Q1628157","display_name":"Inpainting","level":3,"score":0.7521955370903015},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.7024309635162354},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6115672588348389},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.6017748117446899},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4514042139053345},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4151412546634674},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.32618382573127747},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.2727130651473999},{"id":"https://openalex.org/C107038049","wikidata":"https://www.wikidata.org/wiki/Q35986","display_name":"Aesthetics","level":1,"score":0.22288191318511963},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.198043555021286},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10447406768798828},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.062178075313568115}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3680975","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680975","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W46096063","https://openalex.org/W1648544472","https://openalex.org/W1992178755","https://openalex.org/W1995387737","https://openalex.org/W2066078528","https://openalex.org/W2085999891","https://openalex.org/W2138719476","https://openalex.org/W2169180857","https://openalex.org/W2187089797","https://openalex.org/W2227324914","https://openalex.org/W2325939864","https://openalex.org/W2536626143","https://openalex.org/W2754125855","https://openalex.org/W2791762836","https://openalex.org/W2922538097","https://openalex.org/W2944294033","https://openalex.org/W2963290645","https://openalex.org/W2963770578","https://openalex.org/W2963839617","https://openalex.org/W2970903655","https://openalex.org/W2979157532","https://openalex.org/W2981087920","https://openalex.org/W2982058372","https://openalex.org/W2989207674","https://openalex.org/W3000996870","https://openalex.org/W3034192864","https://openalex.org/W3035512475","https://openalex.org/W3096831136","https://openalex.org/W3136958399","https://openalex.org/W3186090335","https://openalex.org/W3197199219","https://openalex.org/W3199474181","https://openalex.org/W3207671158","https://openalex.org/W3215628224","https://openalex.org/W4200386655","https://openalex.org/W4256104473"],"related_works":["https://openalex.org/W2380775572","https://openalex.org/W2213520135","https://openalex.org/W2244018504","https://openalex.org/W4242046654","https://openalex.org/W3174923100","https://openalex.org/W4226517663","https://openalex.org/W4308234225","https://openalex.org/W2162778274","https://openalex.org/W2999351684","https://openalex.org/W4200625923"],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,24,86,223],"multimodal":[3],"artificial":[4],"intelligence":[5],"have":[6],"greatly":[7],"improved":[8],"the":[9,16,32,114,118,123,134,167,187,192,209],"integration":[10],"of":[11,80,117,212],"vision-language-audio":[12],"cues":[13],"to":[14,36,128,150,164,176],"enrich":[15],"content":[17],"creation":[18],"process.":[19],"Inspired":[20],"by":[21],"these":[22],"developments,":[23],"this":[25],"paper,":[26],"we":[27,90,171],"first":[28],"integrate":[29],"audio":[30,71,102,124,180,184,217],"into":[31,195],"face":[33,64,87,98,105,181,196,219],"inpainting":[34,197,220],"task":[35],"facilitate":[37],"identity":[38,50,68,81,84,131,138,155,160,214,224],"manipulation.":[39],"Our":[40],"main":[41],"insight":[42],"is":[43,107,126],"that":[44,157],"a":[45,92,97,146,152],"person's":[46],"voice":[47],"carries":[48],"distinct":[49],"markers,":[51],"such":[52],"as":[53,72],"age":[54],"and":[55,83,100,185],"gender,":[56],"which":[57],"provide":[58],"an":[59,101,173,178,200],"essential":[60],"supplement":[61],"for":[62],"identity-aware":[63],"inpainting.":[65,88],"By":[66],"extracting":[67,110,213],"information":[69,112,168,215],"from":[70,113,133,140,169,183,191,216],"guidance,":[73],"our":[74],"method":[75],"can":[76],"naturally":[77],"support":[78],"tasks":[79],"preservation":[82],"swapping":[85],"Specifically,":[89],"introduce":[91,172],"dual-stream":[93],"network":[94,198],"architecture":[95],"comprising":[96],"branch":[99,106,125],"branch.":[103],"The":[104,137],"tasked":[108],"with":[109],"deterministic":[111],"visible":[115],"parts":[116],"input":[119],"masked":[120],"face,":[121],"while":[122],"designed":[127],"capture":[129],"heuristic":[130],"priors":[132],"speaker's":[135],"voice.":[136],"codes":[139],"two":[141],"streams":[142],"are":[143],"integrated":[144],"using":[145],"multi-layer":[147],"perceptron":[148],"(MLP)":[149],"create":[151],"virtual":[153],"unified":[154],"embedding":[156],"represennts":[158],"comprehensive":[159],"features.":[161],"In":[162],"addition,":[163],"explicitly":[165],"exploit":[166],"audio,":[170],"audio-face":[174,193],"generator":[175,194],"generate":[177],"'fake'":[179],"directly":[182],"fuse":[186],"multi-scale":[188],"intermediate":[189],"features":[190],"through":[199],"audio-visual":[201],"feature":[202],"fusion":[203],"(AVFF)":[204],"module.":[205],"Extensive":[206],"experiments":[207],"demonstrate":[208],"positive":[210],"impact":[211],"on":[218],"task,":[221],"especially":[222],"preservation.":[225]},"counts_by_year":[],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
