{"id":"https://openalex.org/W7161701786","doi":"https://doi.org/10.48550/arxiv.2605.16918","title":"HighSync: High-Quality Lip Synchronization via Latent Diffusion Models","display_name":"HighSync: High-Quality Lip Synchronization via Latent Diffusion Models","publication_year":2026,"publication_date":"2026-05-16","ids":{"openalex":"https://openalex.org/W7161701786","doi":"https://doi.org/10.48550/arxiv.2605.16918"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.16918","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.16918","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.16918","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136478659","display_name":"Saeed Firouzi Daghigh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daghigh, Saeed Firouzi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136491245","display_name":"Majid Iranpour Mobarekeh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mobarekeh, Majid Iranpour","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136487346","display_name":"Mostafa Alavi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alavi, Mostafa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136483709","display_name":"Mehdi Bagheri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bagheri, Mehdi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9222999811172485,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9222999811172485,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.03139999881386757,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.006200000178068876,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sync","display_name":"sync","score":0.7771999835968018},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.7353000044822693},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5095000267028809},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.44269999861717224},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4056999981403351},{"id":"https://openalex.org/keywords/image-quality","display_name":"Image quality","score":0.30660000443458557}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7800999879837036},{"id":"https://openalex.org/C3913047","wikidata":"https://www.wikidata.org/wiki/Q1956265","display_name":"sync","level":3,"score":0.7771999835968018},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.7353000044822693},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6046000123023987},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5095000267028809},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49390000104904175},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.44269999861717224},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4056999981403351},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3637000024318695},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.34380000829696655},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.30660000443458557},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C150817343","wikidata":"https://www.wikidata.org/wiki/Q875932","display_name":"Digital watermarking","level":3,"score":0.2759000062942505},{"id":"https://openalex.org/C108734733","wikidata":"https://www.wikidata.org/wiki/Q1172333","display_name":"Data synchronization","level":3,"score":0.2635999917984009},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.2621999979019165},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.25780001282691956},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.16918","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.16918","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.16918","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.16918","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"HighSync,":[2],"an":[3],"end-to-end":[4],"diffusion-based":[5],"framework":[6],"for":[7,69],"high-fidelity":[8],"lip":[9,40,54],"synchronization":[10,30,122],"that":[11,95,126],"generates":[12],"photorealistic":[13],"talking-face":[14],"videos":[15],"aligned":[16],"with":[17,29],"arbitrary":[18],"input":[19],"audio.":[20],"Existing":[21],"approaches":[22],"consistently":[23],"struggle":[24],"to":[25,48,57,81],"reconcile":[26],"image":[27],"quality":[28,120],"accuracy,":[31],"producing":[32],"either":[33],"visually":[34],"degraded":[35],"outputs":[36],"or":[37],"temporally":[38],"inconsistent":[39],"movements.":[41],"HighSync":[42,127],"addresses":[43],"both":[44,118,132],"challenges":[45],"simultaneously":[46],"and,":[47],"our":[49,82],"knowledge,":[50],"is":[51,84],"the":[52,75,85,112],"first":[53],"sync":[55],"model":[56],"operate":[58],"natively":[59],"at":[60],"512*512":[61],"resolution,":[62],"positioning":[63],"it":[64],"as":[65,74],"a":[66,91,108],"viable":[67],"solution":[68],"professional":[70],"production":[71],"environments":[72],"such":[73],"film":[76],"and":[77,87,121,138],"broadcast":[78],"industries.":[79],"Central":[80],"approach":[83],"identification":[86],"systematic":[88],"elimination":[89],"of":[90],"data":[92],"leakage":[93],"phenomenon":[94],"has":[96],"silently":[97],"undermined":[98],"temporal":[99],"modeling":[100],"in":[101],"prior":[102],"work,":[103],"preventing":[104],"models":[105],"from":[106],"developing":[107],"genuine":[109],"dependence":[110],"on":[111,131],"audio":[113],"signal.":[114],"Comprehensive":[115],"evaluations":[116],"across":[117],"perceptual":[119],"accuracy":[123],"metrics":[124],"confirm":[125],"achieves":[128],"state-of-the-art":[129],"performance":[130],"fronts.":[133],"Source":[134],"code,":[135],"pre-trained":[136],"models,":[137],"supplementary":[139],"video":[140],"results":[141],"are":[142],"publicly":[143],"available":[144],"at:":[145],"https://github.com/saeed5959/high_sync":[146]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-20T00:00:00"}
