{"id":"https://openalex.org/W4366201642","doi":"https://doi.org/10.1109/access.2023.3267668","title":"Efficient Audiovisual Fusion for Active Speaker Detection","display_name":"Efficient Audiovisual Fusion for Active Speaker Detection","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4366201642","doi":"https://doi.org/10.1109/access.2023.3267668"},"language":"en","primary_location":{"id":"doi:10.1109/access.2023.3267668","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3267668","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10103538.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10103538.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090074861","display_name":"Fiseha B. Tesema","orcid":"https://orcid.org/0000-0002-7472-4007"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fiseha B. Tesema","raw_affiliation_strings":["Interdisciplinary Innovation Research Institute, Zhejiang Lab, Zhongtai, Yuhang, Hangzhou, China","Interdisciplinary Innovation Research Institute, Zhejiang Lab, Kechuang Avenue, Zhongtai Sub-District, Yuhang District, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-7472-4007","affiliations":[{"raw_affiliation_string":"Interdisciplinary Innovation Research Institute, Zhejiang Lab, Zhongtai, Yuhang, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]},{"raw_affiliation_string":"Interdisciplinary Innovation Research Institute, Zhejiang Lab, Kechuang Avenue, Zhongtai Sub-District, Yuhang District, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010340252","display_name":"Jason Gu","orcid":"https://orcid.org/0000-0002-7626-1077"},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jason Gu","raw_affiliation_strings":["Electrical and Computer Engineering, Dalhousie University, Halifax, NS, Canada"],"raw_orcid":"https://orcid.org/0000-0002-7626-1077","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Dalhousie University, Halifax, NS, Canada","institution_ids":["https://openalex.org/I129902397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074371475","display_name":"Wei Song","orcid":"https://orcid.org/0000-0002-0828-7486"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Song","raw_affiliation_strings":["Interdisciplinary Innovation Research Institute, Zhejiang Lab, Zhongtai, Yuhang, Hangzhou, China","Interdisciplinary Innovation Research Institute, Zhejiang Lab, Kechuang Avenue, Zhongtai Sub-District, Yuhang District, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-0828-7486","affiliations":[{"raw_affiliation_string":"Interdisciplinary Innovation Research Institute, Zhejiang Lab, Zhongtai, Yuhang, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]},{"raw_affiliation_string":"Interdisciplinary Innovation Research Institute, Zhejiang Lab, Kechuang Avenue, Zhongtai Sub-District, Yuhang District, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037904160","display_name":"Hong Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Wu","raw_affiliation_strings":["School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039356472","display_name":"Shiqiang Zhu","orcid":"https://orcid.org/0000-0002-5687-4001"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiqiang Zhu","raw_affiliation_strings":["Interdisciplinary Innovation Research Institute, Zhejiang Lab, Zhongtai, Yuhang, Hangzhou, China","Interdisciplinary Innovation Research Institute, Zhejiang Lab, Kechuang Avenue, Zhongtai Sub-District, Yuhang District, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Interdisciplinary Innovation Research Institute, Zhejiang Lab, Zhongtai, Yuhang, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]},{"raw_affiliation_string":"Interdisciplinary Innovation Research Institute, Zhejiang Lab, Kechuang Avenue, Zhongtai Sub-District, Yuhang District, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028966460","display_name":"Zheyuan Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheyuan Lin","raw_affiliation_strings":["Interdisciplinary Innovation Research Institute, Zhejiang Lab, Zhongtai, Yuhang, Hangzhou, China","Interdisciplinary Innovation Research Institute, Zhejiang Lab, Kechuang Avenue, Zhongtai Sub-District, Yuhang District, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Interdisciplinary Innovation Research Institute, Zhejiang Lab, Zhongtai, Yuhang, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]},{"raw_affiliation_string":"Interdisciplinary Innovation Research Institute, Zhejiang Lab, Kechuang Avenue, Zhongtai Sub-District, Yuhang District, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5090074861"],"corresponding_institution_ids":["https://openalex.org/I4210123185"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.5708,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.61653343,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"11","issue":null,"first_page":"45140","last_page":"45153"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7426249980926514},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6218764185905457},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5616618990898132},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.476388156414032},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3626464605331421},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.06585708260536194}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7426249980926514},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6218764185905457},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5616618990898132},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.476388156414032},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3626464605331421},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.06585708260536194},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2023.3267668","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3267668","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10103538.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:c2e7e61d0af441a093fafac5a366aba4","is_oa":true,"landing_page_url":"https://doaj.org/article/c2e7e61d0af441a093fafac5a366aba4","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 11, Pp 45140-45153 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2023.3267668","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3267668","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10103538.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6700000166893005,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G492650815","display_name":null,"funder_award_id":"U21A20488","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8340029140","display_name":null,"funder_award_id":"ID 2019R51010","funder_id":"https://openalex.org/F4320329747","funder_display_name":"Zhejiang Provincial Ten Thousand Plan for Young Top Talents"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329747","display_name":"Zhejiang Provincial Ten Thousand Plan for Young Top Talents","ror":null},{"id":"https://openalex.org/F4320336605","display_name":"National Ten Thousand Talent Program","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4366201642.pdf","grobid_xml":"https://content.openalex.org/works/W4366201642.grobid-xml"},"referenced_works_count":90,"referenced_works":["https://openalex.org/W64459544","https://openalex.org/W1522301498","https://openalex.org/W1836465849","https://openalex.org/W1872883209","https://openalex.org/W1980528705","https://openalex.org/W2015293542","https://openalex.org/W2033966334","https://openalex.org/W2054852564","https://openalex.org/W2078813998","https://openalex.org/W2081074144","https://openalex.org/W2090088747","https://openalex.org/W2100561338","https://openalex.org/W2104804886","https://openalex.org/W2106488367","https://openalex.org/W2118847468","https://openalex.org/W2138621090","https://openalex.org/W2138761194","https://openalex.org/W2140831887","https://openalex.org/W2159922428","https://openalex.org/W2163973301","https://openalex.org/W2168996682","https://openalex.org/W2169165592","https://openalex.org/W2287407690","https://openalex.org/W2302255633","https://openalex.org/W2330149154","https://openalex.org/W2398196429","https://openalex.org/W2400967734","https://openalex.org/W2413794162","https://openalex.org/W2547701628","https://openalex.org/W2550143307","https://openalex.org/W2597655663","https://openalex.org/W2604379605","https://openalex.org/W2610846088","https://openalex.org/W2612675303","https://openalex.org/W2619383789","https://openalex.org/W2621109248","https://openalex.org/W2673722796","https://openalex.org/W2726515241","https://openalex.org/W2759799350","https://openalex.org/W2768817490","https://openalex.org/W2792811594","https://openalex.org/W2806563680","https://openalex.org/W2808631503","https://openalex.org/W2889555942","https://openalex.org/W2892350043","https://openalex.org/W2916104401","https://openalex.org/W2950864153","https://openalex.org/W2963173190","https://openalex.org/W2963470929","https://openalex.org/W2963528589","https://openalex.org/W2963738886","https://openalex.org/W2964052309","https://openalex.org/W2964171275","https://openalex.org/W2979750740","https://openalex.org/W2981632379","https://openalex.org/W3001079424","https://openalex.org/W3003903817","https://openalex.org/W3007075806","https://openalex.org/W3015222335","https://openalex.org/W3016098309","https://openalex.org/W3034702511","https://openalex.org/W3035875334","https://openalex.org/W3087845454","https://openalex.org/W3104119587","https://openalex.org/W3148848505","https://openalex.org/W3172472082","https://openalex.org/W3189964604","https://openalex.org/W3206008172","https://openalex.org/W3207207922","https://openalex.org/W3211423998","https://openalex.org/W4214701094","https://openalex.org/W4286902976","https://openalex.org/W4289665794","https://openalex.org/W4304808241","https://openalex.org/W4312466984","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6638667902","https://openalex.org/W6675048546","https://openalex.org/W6675974536","https://openalex.org/W6684648996","https://openalex.org/W6698183232","https://openalex.org/W6712911086","https://openalex.org/W6735377749","https://openalex.org/W6735927292","https://openalex.org/W6737479944","https://openalex.org/W6737896281","https://openalex.org/W6738390143","https://openalex.org/W6739901393","https://openalex.org/W6802457966"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2132659060","https://openalex.org/W2031992971","https://openalex.org/W3214791684","https://openalex.org/W2152662039"],"abstract_inverted_index":{"Active":[0],"speaker":[1,242],"detection":[2,124,243,264],"(ASD)":[3],"refers":[4],"to":[5,36,57,193,267],"detecting":[6],"the":[7,26,54,71,74,81,93,117,140,151,159,191,195,203,217,223,234,238,245,258,282,299,305],"speaking":[8],"person":[9],"among":[10],"visible":[11],"human":[12],"instances":[13],"in":[14,119],"a":[15,21,30,88,175,210,229],"video.":[16],"Existing":[17],"methods":[18],"widely":[19],"employed":[20,271],"similar":[22],"audiovisual":[23,131,241],"fusion":[24,31,108,113,132,221],"approach,":[25,222],"concatenation.":[27],"Although":[28],"such":[29,182],"approach":[32],"is":[33,68],"often":[34],"argued":[35],"help":[37],"enhance":[38],"performance,":[39],"it":[40,86],"must":[41],"be":[42],"noted":[43],"that":[44,69,78,103,138,254,270],"neither":[45],"feature":[46,76,136,220],"modalities":[47],"play":[48],"an":[49,111,129],"equal":[50],"role.":[51],"It":[52],"forces":[53],"backend":[55,94],"network":[56,118,196,205],"focus":[58,167],"on":[59,150,169,190,233,281],"learning":[60,120],"intramodal":[61],"rather":[62],"than":[63,304],"intermodal":[64],"features.":[65,162],"Another":[66],"concern":[67],"since":[70],"concatenation":[72,306],"doubles":[73],"fused":[75],"dimension":[77],"feeds":[79],"from":[80],"audio":[82,161],"and":[83,122,145,155,261,295,302],"video":[84],"module,":[85],"creates":[87],"higher":[89],"computational":[90,177],"overhead":[91,178],"for":[92],"network.":[95],"To":[96],"address":[97],"these":[98],"problems,":[99],"this":[100,249],"work":[101,127,208,250],"hypothesizes":[102],"instead":[104],"of":[105,231,237],"leveraging":[106],"deterministic":[107],"operation,":[109],"employing":[110],"efficient":[112,130],"technique":[114],"may":[115],"assist":[116],"efficiently":[121],"improve":[123],"accuracy.":[125],"This":[126,207],"proposes":[128],"(AVF)":[133],"with":[134,158],"fewer":[135],"dimensions":[137],"captures":[139],"correlations":[141],"between":[142],"facial":[143,153],"regions":[144],"sound":[146],"signals,":[147],"focusing":[148],"more":[149],"discriminative":[152],"features":[154],"associating":[156],"them":[157],"corresponding":[160],"Furthermore,":[163],"previous":[164,252],"ASD":[165,171,215],"works":[166,253,269],"only":[168],"improving":[170],"performance":[172],"by":[173],"creating":[174],"large":[176],"using":[179,216],"complex":[180],"techniques":[181,189],"as":[183],"adding":[184],"sophisticated":[185],"postprocessing,":[186],"applying":[187],"smoothing":[188],"classifier":[192],"refine":[194],"outputs":[197],"at":[198],"multiple":[199,204],"stages,":[200],"or":[201],"assembling":[202],"outputs.":[206],"proposed":[209,219,226,276],"simple":[211],"yet":[212],"effective":[213],"end-to-end":[214],"newly":[218],"AVF.":[224],"The":[225,275,287],"framework":[227],"attained":[228,262],"mAP":[230],"84.384%":[232],"validation":[235],"set":[236],"most":[239],"challenging":[240],"benchmark,":[244],"AVA-ActiveSpeaker.":[246],"With":[247],"this,":[248],"outperformed":[251],"did":[255],"not":[256],"apply":[257],"postprocessing":[259,273],"tasks":[260],"competitive":[263],"accuracy":[265],"compared":[266],"other":[268],"different":[272,291],"tasks.":[274],"model":[277],"also":[278],"learns":[279],"better":[280],"unsynchronized":[283],"raw":[284],"AVA-ActiveSpeaker":[285],"dataset.":[286],"ablation":[288],"experiments":[289],"under":[290],"image":[292],"scale":[293],"settings":[294],"noisy":[296],"signals":[297],"show":[298],"AFV\u2019s":[300],"effectiveness":[301],"robustness":[303],"operation.":[307]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
