{"id":"https://openalex.org/W4415709064","doi":"https://doi.org/10.1109/icme59968.2025.11209535","title":"UniSync: A Unified Framework for Audio-Visual Synchronization","display_name":"UniSync: A Unified Framework for Audio-Visual Synchronization","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415709064","doi":"https://doi.org/10.1109/icme59968.2025.11209535"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209535","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209535","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058581622","display_name":"Tao Feng","orcid":"https://orcid.org/0000-0002-5759-3164"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tao Feng","raw_affiliation_strings":["Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056844117","display_name":"Yifan Xie","orcid":"https://orcid.org/0009-0002-3225-9220"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Xie","raw_affiliation_strings":["Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xun Guan","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xun Guan","raw_affiliation_strings":["Tsinghua University,Shenzhen International Graduate School,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University,Shenzhen International Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiyuan Song","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiyuan Song","raw_affiliation_strings":["Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhou Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhou Liu","raw_affiliation_strings":["Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100608217","display_name":"Fei Ma","orcid":"https://orcid.org/0000-0002-3911-7121"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Ma","raw_affiliation_strings":["Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076311774","display_name":"Fei Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Yu","raw_affiliation_strings":["Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Guangdong Laboratory of Artificial Intelligence and Digital Economy (SZ),Shenzhen,China","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5058581622"],"corresponding_institution_ids":["https://openalex.org/I4210136793"],"apc_list":null,"apc_paid":null,"fwci":1.1236,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82784105,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6050000190734863,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6050000190734863,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.1598999947309494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12032","display_name":"Multisensory perception and integration","score":0.046300001442432404,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.696399986743927},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.6247000098228455},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5702000260353088},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5098000168800354},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4659999907016754},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.37560001015663147},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.37450000643730164},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3653999865055084}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7940000295639038},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.696399986743927},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.6247000098228455},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5712000131607056},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5702000260353088},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5098000168800354},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4659999907016754},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41679999232292175},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.37560001015663147},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.37450000643730164},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3653999865055084},{"id":"https://openalex.org/C108734733","wikidata":"https://www.wikidata.org/wiki/Q1172333","display_name":"Data synchronization","level":3,"score":0.35839998722076416},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.31619998812675476},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2849999964237213},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209535","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209535","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W34110307","https://openalex.org/W1569907127","https://openalex.org/W1974357398","https://openalex.org/W2077521262","https://openalex.org/W2082942761","https://openalex.org/W2122591539","https://openalex.org/W2289286917","https://openalex.org/W2295661697","https://openalex.org/W2604379605","https://openalex.org/W2741151796","https://openalex.org/W2890952074","https://openalex.org/W2950864153","https://openalex.org/W2981767644","https://openalex.org/W3081492798","https://openalex.org/W3142098745","https://openalex.org/W3186090335","https://openalex.org/W4200371243","https://openalex.org/W4235964886","https://openalex.org/W4297841713","https://openalex.org/W4313023760","https://openalex.org/W4375869161","https://openalex.org/W4403791312","https://openalex.org/W4409366558"],"related_works":[],"abstract_inverted_index":{"Precise":[0],"audio-visual":[1,38,64,129],"synchronization":[2,65,139],"in":[3,20,48,141],"speech":[4],"videos":[5],"is":[6],"crucial":[7],"for":[8,62],"content":[9],"quality":[10,140],"and":[11,27,40,81,110,124,144],"viewer":[12],"comprehension.":[13],"Existing":[14],"methods":[15,33,120],"have":[16],"made":[17],"significant":[18,96],"strides":[19],"addressing":[21],"this":[22],"challenge":[23],"through":[24],"rule-based":[25],"approaches":[26],"end-to-end":[28],"learning":[29,42,103],"techniques.":[30],"However,":[31],"these":[32,54],"often":[34],"rely":[35],"on":[36,121],"limited":[37],"representations":[39,76,83],"suboptimal":[41],"strategies,":[43],"potentially":[44],"constraining":[45],"their":[46,95],"effectiveness":[47],"more":[49],"complex":[50],"scenarios.":[51],"To":[52],"address":[53],"limitations,":[55],"we":[56],"present":[57],"UniSync,":[58],"a":[59,106],"novel":[60],"approach":[61],"evaluating":[63],"using":[66],"embedding":[67],"similarities.":[68],"UniSync":[69,117],"offers":[70],"broad":[71],"compatibility":[72],"with":[73,105],"various":[74],"audio":[75],"(e.g.,":[77,84],"Mel":[78],"spectrograms,":[79],"HuBERT)":[80],"visual":[82],"RGB":[85],"images,":[86],"face":[87,135],"parsing":[88],"maps,":[89],"facial":[90],"landmarks,":[91],"3DMM),":[92],"effectively":[93],"handling":[94],"dimensional":[97],"differences.":[98],"We":[99],"enhance":[100],"the":[101],"contrastive":[102],"framework":[104],"margin-based":[107],"loss":[108],"component":[109],"cross-speaker":[111],"unsynchronized":[112],"pairs,":[113],"improving":[114],"discriminative":[115],"capabilities.":[116],"outperforms":[118],"existing":[119],"standard":[122],"datasets":[123],"demonstrates":[125],"versatility":[126],"across":[127],"diverse":[128],"representations.":[130],"Its":[131],"integration":[132],"into":[133],"talking":[134],"generation":[136],"frameworks":[137],"enhances":[138],"both":[142],"natural":[143],"AI-generated":[145],"content.":[146]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-28T09:10:13.091523","created_date":"2025-10-30T00:00:00"}
