{"id":"https://openalex.org/W4386597431","doi":"https://doi.org/10.1109/icip49359.2023.10222345","title":"ASVFI: Audio-Driven Speaker Video Frame Interpolation","display_name":"ASVFI: Audio-Driven Speaker Video Frame Interpolation","publication_year":2023,"publication_date":"2023-09-11","ids":{"openalex":"https://openalex.org/W4386597431","doi":"https://doi.org/10.1109/icip49359.2023.10222345"},"language":"en","primary_location":{"id":"doi:10.1109/icip49359.2023.10222345","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icip49359.2023.10222345","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Image Processing (ICIP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074773696","display_name":"Qianrui Wang","orcid":"https://orcid.org/0000-0001-9058-0253"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qianrui Wang","raw_affiliation_strings":["Jianghan University,School of Artificial Intelligence,China","School of Artificial Intelligence, Jianghan University, China"],"affiliations":[{"raw_affiliation_string":"Jianghan University,School of Artificial Intelligence,China","institution_ids":["https://openalex.org/I31590910"]},{"raw_affiliation_string":"School of Artificial Intelligence, Jianghan University, China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055698842","display_name":"Dengshi Li","orcid":"https://orcid.org/0000-0002-3349-8664"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dengshi Li","raw_affiliation_strings":["Jianghan University,School of Artificial Intelligence,China","School of Artificial Intelligence, Jianghan University, China"],"affiliations":[{"raw_affiliation_string":"Jianghan University,School of Artificial Intelligence,China","institution_ids":["https://openalex.org/I31590910"]},{"raw_affiliation_string":"School of Artificial Intelligence, Jianghan University, China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080825791","display_name":"Liang Liao","orcid":"https://orcid.org/0000-0002-2238-2420"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Liang Liao","raw_affiliation_strings":["Nanyang Technological University,School of Computer Science and Engineering,Singapore","School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University,School of Computer Science and Engineering,Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015266516","display_name":"Hao Song","orcid":"https://orcid.org/0000-0002-1554-3759"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Song","raw_affiliation_strings":["Jianghan University,School of Artificial Intelligence,China","School of Artificial Intelligence, Jianghan University, China"],"affiliations":[{"raw_affiliation_string":"Jianghan University,School of Artificial Intelligence,China","institution_ids":["https://openalex.org/I31590910"]},{"raw_affiliation_string":"School of Artificial Intelligence, Jianghan University, China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100454540","display_name":"Wei Li","orcid":"https://orcid.org/0000-0001-7231-1586"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Li","raw_affiliation_strings":["Jianghan University,School of Artificial Intelligence,China","School of Artificial Intelligence, Jianghan University, China"],"affiliations":[{"raw_affiliation_string":"Jianghan University,School of Artificial Intelligence,China","institution_ids":["https://openalex.org/I31590910"]},{"raw_affiliation_string":"School of Artificial Intelligence, Jianghan University, China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101623807","display_name":"Jing Xiao","orcid":"https://orcid.org/0000-0002-0833-5679"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Xiao","raw_affiliation_strings":["Wuhan University,National Engineering Research Center for Multimedia Software,China","National Engineering Research Center for Multimedia Software, Wuhan University, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University,National Engineering Research Center for Multimedia Software,China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"National Engineering Research Center for Multimedia Software, Wuhan University, China","institution_ids":["https://openalex.org/I37461747"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5074773696"],"corresponding_institution_ids":["https://openalex.org/I31590910"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11178435,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3200","last_page":"3204"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/motion-interpolation","display_name":"Motion interpolation","score":0.7928675413131714},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7673513889312744},{"id":"https://openalex.org/keywords/interpolation","display_name":"Interpolation (computer graphics)","score":0.6261616945266724},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5955260992050171},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5801399946212769},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5453616380691528},{"id":"https://openalex.org/keywords/video-compression-picture-types","display_name":"Video compression picture types","score":0.4676821827888489},{"id":"https://openalex.org/keywords/block-matching-algorithm","display_name":"Block-matching algorithm","score":0.45417237281799316},{"id":"https://openalex.org/keywords/frame-rate","display_name":"Frame rate","score":0.44360679388046265},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3566398620605469},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.3173182010650635},{"id":"https://openalex.org/keywords/video-processing","display_name":"Video processing","score":0.2947154641151428},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.247505784034729},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07137778401374817}],"concepts":[{"id":"https://openalex.org/C72560505","wikidata":"https://www.wikidata.org/wiki/Q204510","display_name":"Motion interpolation","level":5,"score":0.7928675413131714},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7673513889312744},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.6261616945266724},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5955260992050171},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5801399946212769},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5453616380691528},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.4676821827888489},{"id":"https://openalex.org/C167510206","wikidata":"https://www.wikidata.org/wiki/Q2835824","display_name":"Block-matching algorithm","level":4,"score":0.45417237281799316},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.44360679388046265},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3566398620605469},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3173182010650635},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.2947154641151428},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.247505784034729},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07137778401374817}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icip49359.2023.10222345","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icip49359.2023.10222345","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Image Processing (ICIP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2051562356","display_name":null,"funder_award_id":"060101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2870525900","display_name":null,"funder_award_id":"Wuhan","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5302744849","display_name":null,"funder_award_id":"U22A2035","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5379492266","display_name":null,"funder_award_id":"61701194","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6058138561","display_name":null,"funder_award_id":", No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6163091765","display_name":null,"funder_award_id":"2020010","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6291367666","display_name":null,"funder_award_id":"2020010601012288","funder_id":"https://openalex.org/F4320336636","funder_display_name":"Wuhan Science and Technology Project"},{"id":"https://openalex.org/G8650790963","display_name":null,"funder_award_id":"2019029","funder_id":"https://openalex.org/F4320326959","funder_display_name":"Jianghan University"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320326959","display_name":"Jianghan University","ror":"https://ror.org/041c9x778"},{"id":"https://openalex.org/F4320330944","display_name":"Nature","ror":null},{"id":"https://openalex.org/F4320336636","display_name":"Wuhan Science and Technology Project","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2087681821","https://openalex.org/W2133665775","https://openalex.org/W2526050071","https://openalex.org/W2604329646","https://openalex.org/W2808631503","https://openalex.org/W2949258649","https://openalex.org/W2997150500","https://openalex.org/W3015761874","https://openalex.org/W3035239272","https://openalex.org/W3093287838","https://openalex.org/W3110612869","https://openalex.org/W3197199219","https://openalex.org/W3197845860","https://openalex.org/W3203041407","https://openalex.org/W3207505152","https://openalex.org/W4304014328","https://openalex.org/W4304099183","https://openalex.org/W4312623579","https://openalex.org/W4313306253","https://openalex.org/W4319301042","https://openalex.org/W4322706707"],"related_works":["https://openalex.org/W1574724839","https://openalex.org/W2094004415","https://openalex.org/W2187171999","https://openalex.org/W2118574600","https://openalex.org/W2125918940","https://openalex.org/W2032827369","https://openalex.org/W2946000660","https://openalex.org/W2140582509","https://openalex.org/W2096736219","https://openalex.org/W2000053753"],"abstract_inverted_index":{"Due":[0],"to":[1,30,103],"limited":[2],"data":[3],"transmission,":[4],"the":[5,12,24,32,46,51,69,91,105,111,121,125,133,140,150,156],"video":[6,33,39,106,114],"frame":[7,20,34,40,123],"rate":[8],"is":[9,54,80,135,152],"low":[10],"during":[11],"online":[13],"conference,":[14],"severely":[15],"affecting":[16],"user":[17],"experience.":[18],"Video":[19,85,100],"interpolation":[21,41],"can":[22,59],"solve":[23],"problem":[25],"by":[26,116],"interpolating":[27,143,147],"intermediate":[28,63,122],"frames":[29,64],"increase":[31],"rate.":[35],"Generally,":[36],"most":[37],"existing":[38],"methods":[42,58],"are":[43],"based":[44],"on":[45],"linear":[47],"motion":[48,53],"assumption.":[49],"However,":[50],"mouth":[52,73],"nonlinear,":[55],"and":[56,75,113,118],"these":[57],"not":[60],"generate":[61],"superior":[62],"in":[65,124],"speaker":[66],"video.":[67],"Considering":[68],"strong":[70],"correlation":[71],"between":[72],"shape":[74],"vocalization,":[76],"a":[77],"new":[78],"method":[79],"proposed,":[81],"named":[82],"Audio-driven":[83],"Speaker":[84],"Frame":[86],"Interpolation(ASVFI).":[87],"First,":[88],"we":[89,98,109],"extract":[90,104],"audio":[92,112],"feature":[93],"from":[94],"Audio":[95],"Net(ANet).":[96],"Second,":[97],"use":[99],"Net(VNet)":[101],"encoder":[102],"feature.":[107],"Finally,":[108],"fuse":[110],"features":[115],"AVFusion":[117],"decode":[119],"out":[120],"VNet":[126],"decoder.":[127],"The":[128],"experimental":[129],"results":[130],"show":[131],"that":[132],"PSNR":[134,151],"nearly":[136],"0.13dB":[137],"higher":[138,154],"than":[139,155],"baseline":[141],"of":[142],"one":[144],"frame.":[145],"When":[146],"seven":[148],"frames,":[149],"0.33dB":[153],"baseline.":[157]},"counts_by_year":[],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
