{"id":"https://openalex.org/W7161165172","doi":"https://doi.org/10.1109/dcc66757.2026.00020","title":"Audio-Visual Cross-Modal Compression for Generative Face Video Coding","display_name":"Audio-Visual Cross-Modal Compression for Generative Face Video Coding","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7161165172","doi":"https://doi.org/10.1109/dcc66757.2026.00020"},"language":null,"primary_location":{"id":"doi:10.1109/dcc66757.2026.00020","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dcc66757.2026.00020","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 Data Compression Conference (DCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038834628","display_name":"Youmin Xu","orcid":"https://orcid.org/0000-0003-2510-3850"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Youmin Xu","raw_affiliation_strings":["School of Electronic and Computer Engineering, Peking University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronic and Computer Engineering, Peking University,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101980608","display_name":"Mengxi Guo","orcid":"https://orcid.org/0009-0007-9490-6661"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mengxi Guo","raw_affiliation_strings":["Bytedance Inc.,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bytedance Inc.,Shenzhen,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136169624","display_name":"Shijie Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shijie Zhao","raw_affiliation_strings":["Bytedance Inc.,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bytedance Inc.,Shenzhen,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083561810","display_name":"Weiqi Li","orcid":"https://orcid.org/0009-0000-0057-5486"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiqi Li","raw_affiliation_strings":["School of Electronic and Computer Engineering, Peking University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronic and Computer Engineering, Peking University,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136174005","display_name":"Junlin Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Junlin Li","raw_affiliation_strings":["Bytedance Inc.,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bytedance Inc.,Shenzhen,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136166123","display_name":"Li Xin Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li Zhang","raw_affiliation_strings":["Bytedance Inc.,Shenzhen,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bytedance Inc.,Shenzhen,China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5136106429","display_name":"Jian Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Zhang","raw_affiliation_strings":["School of Electronic and Computer Engineering, Peking University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronic and Computer Engineering, Peking University,China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5038834628"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.95764783,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"123","last_page":"132"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6080999970436096,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6080999970436096,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.21649999916553497,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.045499999076128006,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5315999984741211},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.40689998865127563},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.3709000051021576},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.3650999963283539},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.3093000054359436},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.30660000443458557}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6486999988555908},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6330999732017517},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5575000047683716},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5315999984741211},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.40689998865127563},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.3709000051021576},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.3650999963283539},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3093000054359436},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.30660000443458557},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2718000113964081},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C57654395","wikidata":"https://www.wikidata.org/wiki/Q1097775","display_name":"Compression artifact","level":5,"score":0.2612000107765198},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2563000023365021},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.25450000166893005},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dcc66757.2026.00020","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dcc66757.2026.00020","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 Data Compression Conference (DCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Generative":[0],"face":[1],"video":[2,11,19,64,72],"coding":[3],"(GFVC)":[4],"is":[5],"vital":[6],"for":[7,46,132],"modern":[8],"applications":[9],"like":[10],"conferencing,":[12],"yet":[13],"existing":[14],"methods":[15],"primarily":[16],"focus":[17],"on":[18],"motion":[20,69],"while":[21],"neglecting":[22],"the":[23,30,108,116,130],"significant":[24],"bitrate":[25],"contribution":[26],"of":[27,90],"audio.":[28],"Despite":[29],"well-established":[31],"correlation":[32],"between":[33],"audio":[34,62,75],"and":[35,63,73,122],"lip":[36],"movements,":[37],"this":[38],"cross-modal":[39],"coherence":[40],"has":[41],"not":[42],"been":[43],"systematically":[44],"exploited":[45],"compression.":[47],"To":[48],"address":[49],"this,":[50],"we":[51],"propose":[52],"an":[53],"Audio-Visual":[54],"Cross-Modal":[55],"Compression":[56],"(AVCC)":[57],"framework":[58,67],"that":[59,112],"jointly":[60],"compresses":[61],"streams.":[65],"Our":[66],"extracts":[68],"information":[70],"from":[71,93,107],"tokenizes":[74],"features,":[76],"then":[77],"aligns":[78],"them":[79],"through":[80],"a":[81,94],"unified":[82],"audio-video":[83],"diffusion":[84],"process.":[85],"This":[86],"allows":[87],"synchronized":[88],"reconstruction":[89],"both":[91],"modalities":[92],"shared":[95],"representation.":[96],"In":[97],"extremely":[98],"low-rate":[99],"scenarios,":[100],"AVCC":[101,113],"can":[102],"even":[103],"reconstruct":[104],"one":[105],"modality":[106],"other.":[109],"Experiments":[110],"show":[111],"significantly":[114],"outperforms":[115],"Versatile":[117],"Video":[118],"Coding":[119],"(VVC)":[120],"standard":[121],"state-of-the-art":[123],"GFVC":[124],"schemes":[125],"in":[126],"rate-distortion":[127],"performance,":[128],"paving":[129],"way":[131],"more":[133],"efficient":[134],"multimodal":[135],"communication":[136],"systems.":[137]},"counts_by_year":[],"updated_date":"2026-05-16T06:04:12.930555","created_date":"2026-05-15T00:00:00"}
