{"id":"https://openalex.org/W4408355450","doi":"https://doi.org/10.1109/icassp49660.2025.10887957","title":"FCConDubber: Fine And Coarse Grained Prosody Alignment For Expressive Video Dubbing via Contrastive Audio-Motion Pretraining","display_name":"FCConDubber: Fine And Coarse Grained Prosody Alignment For Expressive Video Dubbing via Contrastive Audio-Motion Pretraining","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355450","doi":"https://doi.org/10.1109/icassp49660.2025.10887957"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10887957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887957","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087333633","display_name":"Qiulin Li","orcid":null},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qiulin Li","raw_affiliation_strings":["Nanjing University of Aeronautics and Astronautics,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Aeronautics and Astronautics,Nanjing,China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087449078","display_name":"Zhichao Wu","orcid":"https://orcid.org/0000-0002-1195-8834"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhichao Wu","raw_affiliation_strings":["Nanjing University of Aeronautics and Astronautics,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Aeronautics and Astronautics,Nanjing,China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101429772","display_name":"Hanwei Li","orcid":"https://orcid.org/0000-0001-7863-7165"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanwei Li","raw_affiliation_strings":["Nanjing University of Aeronautics and Astronautics,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Aeronautics and Astronautics,Nanjing,China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101708485","display_name":"Xin Dong","orcid":"https://orcid.org/0000-0001-7872-6846"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Dong","raw_affiliation_strings":["Nanjing University of Aeronautics and Astronautics,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Aeronautics and Astronautics,Nanjing,China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022836264","display_name":"Qun Yang","orcid":"https://orcid.org/0000-0001-6824-8473"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qun Yang","raw_affiliation_strings":["Nanjing University of Aeronautics and Astronautics,Nanjing,China"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Aeronautics and Astronautics,Nanjing,China","institution_ids":["https://openalex.org/I9842412"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5087333633"],"corresponding_institution_ids":["https://openalex.org/I9842412"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05680627,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.7307595610618591},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6937590837478638},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.507984459400177},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5033723711967468},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3872528076171875}],"concepts":[{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.7307595610618591},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6937590837478638},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.507984459400177},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5033723711967468},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3872528076171875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10887957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887957","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W2107740512","https://openalex.org/W2107860279","https://openalex.org/W2130086727","https://openalex.org/W2905016804","https://openalex.org/W2964449965","https://openalex.org/W3016011581","https://openalex.org/W3035390927","https://openalex.org/W3035626590","https://openalex.org/W3096918678","https://openalex.org/W3099284785","https://openalex.org/W3150572638","https://openalex.org/W3180794345","https://openalex.org/W3204420730","https://openalex.org/W3209059054","https://openalex.org/W3216976702","https://openalex.org/W4225956675","https://openalex.org/W4294435344","https://openalex.org/W4312382935","https://openalex.org/W4312453532","https://openalex.org/W4382202685","https://openalex.org/W4385823407","https://openalex.org/W4391021646","https://openalex.org/W4392910613","https://openalex.org/W4393152865","https://openalex.org/W6748854608","https://openalex.org/W6763832098","https://openalex.org/W6764679822","https://openalex.org/W6778823374","https://openalex.org/W6802142237","https://openalex.org/W6803063772","https://openalex.org/W6803359641","https://openalex.org/W6803547063","https://openalex.org/W6843731886","https://openalex.org/W6844194202","https://openalex.org/W6845479124","https://openalex.org/W6846770672","https://openalex.org/W6849937806","https://openalex.org/W6853165267"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2355553914","https://openalex.org/W149862513","https://openalex.org/W2347684782","https://openalex.org/W187117048","https://openalex.org/W4320472397","https://openalex.org/W2401269021","https://openalex.org/W2145654520"],"abstract_inverted_index":{"Automatic":[0],"Video":[1],"Dubbing":[2],"(AVD)":[3],"aims":[4],"to":[5,27,36,68],"synthesize":[6],"speech":[7,96,112],"that":[8,61,105],"matches":[9],"a":[10,57,63],"character\u2019s":[11],"speaking":[12],"style":[13,77],"and":[14,41,46,74,87,115],"emotion":[15],"in":[16,111],"silent":[17],"video":[18],"clips.":[19],"However,":[20],"existing":[21],"approaches":[22],"rely":[23],"on":[24,100],"attention":[25],"mechanisms":[26],"learn":[28,69],"cross-modal":[29],"prosodic":[30,39,72],"alignment":[31,73],"implicitly,":[32],"making":[33],"it":[34],"challenging":[35],"capture":[37],"subtle":[38],"variations":[40],"impacting":[42],"the":[43,49,82,101],"overall":[44],"naturalness":[45],"expressiveness":[47],"of":[48,94],"output.":[50],"In":[51],"this":[52],"paper,":[53],"we":[54,80],"propose":[55],"FCConDubber,":[56],"novel":[58],"dubbing":[59],"model":[60],"incorporates":[62],"contrastive":[64],"speech-motion":[65],"pre-training":[66],"framework":[67],"fine-grained":[70],"temporal":[71],"coarse-grained":[75],"global":[76],"information.":[78],"Furthermore,":[79],"explore":[81],"relationship":[83],"between":[84],"facial":[85],"features":[86,89],"audio":[88],"extracted":[90],"from":[91],"different":[92],"layers":[93],"self-supervised":[95],"representation":[97],"models.":[98],"Experiments":[99],"MEAD":[102],"dataset":[103],"demonstrate":[104],"FCConDubber":[106],"significantly":[107],"outperforms":[108],"baseline":[109],"models":[110],"synthesis":[113],"quality":[114],"prosody":[116],"reconstruction.":[117],"The":[118],"synthesized":[119],"samples":[120],"are":[121],"available":[122],"at":[123],"https://fccondubber.github.io/FCConDubber/.":[124]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
