{"id":"https://openalex.org/W4389913714","doi":"https://doi.org/10.3390/s23249845","title":"Audio\u2013Visual Fusion Based on Interactive Attention for Person Verification","display_name":"Audio\u2013Visual Fusion Based on Interactive Attention for Person Verification","publication_year":2023,"publication_date":"2023-12-15","ids":{"openalex":"https://openalex.org/W4389913714","doi":"https://doi.org/10.3390/s23249845","pmid":"https://pubmed.ncbi.nlm.nih.gov/38139689"},"language":"en","primary_location":{"id":"doi:10.3390/s23249845","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s23249845","pdf_url":"https://www.mdpi.com/1424-8220/23/24/9845/pdf?version=1702645976","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1424-8220/23/24/9845/pdf?version=1702645976","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111298718","display_name":"Xuebin Jing","orcid":null},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuebin Jing","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang University, Urumqi 830017, China","Xinjiang Key Laboratory of Signal Detection and Processing, Urumqi 830017, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang University, Urumqi 830017, China","institution_ids":["https://openalex.org/I96908189"]},{"raw_affiliation_string":"Xinjiang Key Laboratory of Signal Detection and Processing, Urumqi 830017, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049944728","display_name":"Liang He","orcid":"https://orcid.org/0000-0003-4076-7479"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liang He","raw_affiliation_strings":["Department of Electronic Engineering, and Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing 100084, China","School of Computer Science and Technology, Xinjiang University, Urumqi 830017, China","Xinjiang Key Laboratory of Signal Detection and Processing, Urumqi 830017, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, and Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing 100084, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang University, Urumqi 830017, China","institution_ids":["https://openalex.org/I96908189"]},{"raw_affiliation_string":"Xinjiang Key Laboratory of Signal Detection and Processing, Urumqi 830017, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020817636","display_name":"Zhida Song","orcid":"https://orcid.org/0000-0002-4987-5962"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhida Song","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang University, Urumqi 830017, China","Xinjiang Key Laboratory of Signal Detection and Processing, Urumqi 830017, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang University, Urumqi 830017, China","institution_ids":["https://openalex.org/I96908189"]},{"raw_affiliation_string":"Xinjiang Key Laboratory of Signal Detection and Processing, Urumqi 830017, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070059389","display_name":"Shaolei Wang","orcid":"https://orcid.org/0000-0002-0691-9179"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaolei Wang","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang University, Urumqi 830017, China","Xinjiang Key Laboratory of Signal Detection and Processing, Urumqi 830017, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang University, Urumqi 830017, China","institution_ids":["https://openalex.org/I96908189"]},{"raw_affiliation_string":"Xinjiang Key Laboratory of Signal Detection and Processing, Urumqi 830017, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5049944728"],"corresponding_institution_ids":["https://openalex.org/I96908189","https://openalex.org/I99065089"],"apc_list":{"value":2400,"currency":"CHF","value_usd":2598},"apc_paid":{"value":2400,"currency":"CHF","value_usd":2598},"fwci":0.3742,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.58675177,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"23","issue":"24","first_page":"9845","last_page":"9845"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7922955751419067},{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.7841784954071045},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6609699726104736},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.570180356502533},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5657870769500732},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5562260746955872},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5442796349525452},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5376793742179871},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48539531230926514},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.4574311375617981},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4571736454963684},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.44133228063583374},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.42646270990371704},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41166603565216064},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3981632888317108},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3873976469039917},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.11221885681152344}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7922955751419067},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.7841784954071045},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6609699726104736},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.570180356502533},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5657870769500732},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5562260746955872},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5442796349525452},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5376793742179871},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48539531230926514},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.4574311375617981},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4571736454963684},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.44133228063583374},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.42646270990371704},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41166603565216064},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3981632888317108},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3873976469039917},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.11221885681152344},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000073256","descriptor_name":"Information Technology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000073256","descriptor_name":"Information Technology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000073256","descriptor_name":"Information Technology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000073256","descriptor_name":"Information Technology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D056667","descriptor_name":"Biometric Identification","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D056667","descriptor_name":"Biometric Identification","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D056667","descriptor_name":"Biometric Identification","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":4,"locations":[{"id":"doi:10.3390/s23249845","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s23249845","pdf_url":"https://www.mdpi.com/1424-8220/23/24/9845/pdf?version=1702645976","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},{"id":"pmid:38139689","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38139689","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors (Basel, Switzerland)","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:10747811","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/10747811","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC10747811/pdf/sensors-23-09845.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sensors (Basel)","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:c218800cbf694fb6a38c9cbfee0e9abf","is_oa":true,"landing_page_url":"https://doaj.org/article/c218800cbf694fb6a38c9cbfee0e9abf","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sensors, Vol 23, Iss 24, p 9845 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/s23249845","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s23249845","pdf_url":"https://www.mdpi.com/1424-8220/23/24/9845/pdf?version=1702645976","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4300000071525574,"display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G4902648226","display_name":null,"funder_award_id":"2022ZD0115801","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4389913714.pdf"},"referenced_works_count":45,"referenced_works":["https://openalex.org/W1516392727","https://openalex.org/W1832693441","https://openalex.org/W1950843348","https://openalex.org/W1998808035","https://openalex.org/W2019370496","https://openalex.org/W2046056978","https://openalex.org/W2096733369","https://openalex.org/W2145287260","https://openalex.org/W2150769028","https://openalex.org/W2154383330","https://openalex.org/W2163605009","https://openalex.org/W2173629880","https://openalex.org/W2325939864","https://openalex.org/W2520774990","https://openalex.org/W2726515241","https://openalex.org/W2736633948","https://openalex.org/W2747238065","https://openalex.org/W2748488820","https://openalex.org/W2777661975","https://openalex.org/W2784163702","https://openalex.org/W2799429369","https://openalex.org/W2806318065","https://openalex.org/W2808631503","https://openalex.org/W2890964092","https://openalex.org/W2902299888","https://openalex.org/W2969985801","https://openalex.org/W3024869864","https://openalex.org/W3025804484","https://openalex.org/W3026006730","https://openalex.org/W3103152812","https://openalex.org/W3126259644","https://openalex.org/W3126757411","https://openalex.org/W3161402421","https://openalex.org/W3209112689","https://openalex.org/W4224924564","https://openalex.org/W4293222191","https://openalex.org/W4304775708","https://openalex.org/W4308448078","https://openalex.org/W4361853108","https://openalex.org/W4372259900","https://openalex.org/W4372265901","https://openalex.org/W4372346152","https://openalex.org/W4379985555","https://openalex.org/W4382060833","https://openalex.org/W4385823274"],"related_works":["https://openalex.org/W2158491338","https://openalex.org/W2807901368","https://openalex.org/W2133733652","https://openalex.org/W2072658171","https://openalex.org/W2606392311","https://openalex.org/W2320042380","https://openalex.org/W4385956668","https://openalex.org/W2900895161","https://openalex.org/W4380838366","https://openalex.org/W2539884462"],"abstract_inverted_index":{"With":[0],"the":[1,15,33,59,64,99,122,157,164,168,176,179,204,210,215,220,224,227,232,242,249],"rapid":[2],"development":[3,159],"of":[4,63,167,193,201,207,223,251],"multimedia":[5],"technology,":[6],"personnel":[7],"verification":[8,23,80,254],"systems":[9,24,81],"have":[10,25],"become":[11],"increasingly":[12],"important":[13],"in":[14,28,117,184],"security":[16],"field":[17],"and":[18,61,82,87,104,125,149,161,172,195,214,231],"identity":[19],"verification.":[20],"However,":[21],"unimodal":[22],"performance":[26,182,250],"bottlenecks":[27],"complex":[29],"scenarios,":[30],"thus":[31],"triggering":[32],"need":[34],"for":[35,66],"multimodal":[36,45,78,252],"feature":[37,46,124],"fusion":[38,47,107,123,141,152,244],"methods.":[39],"The":[40,114],"main":[41],"problem":[42],"with":[43],"audio-visual":[44],"is":[48],"how":[49,75,83],"to":[50,57,76,84,97],"effectively":[51],"integrate":[52],"information":[53],"from":[54,101],"different":[55],"modalities":[56],"improve":[58,77,248],"accuracy":[60],"robustness":[62],"system":[65,181],"individual":[67],"identity.":[68],"In":[69,90],"this":[70,91,118,136,185],"paper,":[71],"we":[72,93,138],"focus":[73],"on":[74,111,144,156,163],"person":[79],"combine":[85],"audio":[86],"visual":[88],"features.":[89],"study,":[92],"use":[94],"pretrained":[95],"models":[96,142,153],"extract":[98],"embeddings":[100],"each":[102],"modality":[103],"then":[105],"perform":[106],"model":[108],"experiments":[109],"based":[110,143],"these":[112],"embeddings.":[113],"baseline":[115],"approach":[116],"paper":[119],"involves":[120],"taking":[121],"passing":[126],"it":[127],"through":[128],"a":[129,196],"fully":[130],"connected":[131],"(FC)":[132],"layer.":[133],"Building":[134],"upon":[135],"baseline,":[137],"propose":[139],"three":[140],"attentional":[145],"mechanisms:":[146],"attention,":[147],"gated,":[148],"inter-attention.":[150],"These":[151,236],"are":[154],"trained":[155],"VoxCeleb1":[158,177],"set":[160,206,222],"tested":[162],"evaluation":[165,205,221],"sets":[166],"VoxCeleb1,":[169],"NIST":[170,208],"SRE19,":[171,209],"CNC-AV":[173,225],"datasets.":[174],"On":[175,203,219],"dataset,":[178],"best":[180],"achieved":[183],"study":[186],"was":[187,212,217,229,234],"an":[188],"equal":[189],"error":[190],"rate":[191],"(EER)":[192],"0.23%":[194],"detection":[197],"cost":[198],"function":[199],"(minDCF)":[200],"0.011.":[202],"EER":[211,228],"2.60%":[213],"minDCF":[216,233],"0.283.":[218],"set,":[226],"11.30%":[230],"0.443.":[235],"experimental":[237],"results":[238],"strongly":[239],"demonstrate":[240],"that":[241],"proposed":[243],"method":[245],"can":[246],"significantly":[247],"character":[253],"systems.":[255]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-24T08:33:08.758527","created_date":"2025-10-10T00:00:00"}
