{"id":"https://openalex.org/W2991553872","doi":"https://doi.org/10.3390/s19235163","title":"Multimodal Speaker Diarization Using a Pre-Trained Audio-Visual Synchronization Model","display_name":"Multimodal Speaker Diarization Using a Pre-Trained Audio-Visual Synchronization Model","publication_year":2019,"publication_date":"2019-11-25","ids":{"openalex":"https://openalex.org/W2991553872","doi":"https://doi.org/10.3390/s19235163","mag":"2991553872","pmid":"https://pubmed.ncbi.nlm.nih.gov/31775385"},"language":"en","primary_location":{"id":"doi:10.3390/s19235163","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s19235163","pdf_url":"https://www.mdpi.com/1424-8220/19/23/5163/pdf?version=1575439577","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1424-8220/19/23/5163/pdf?version=1575439577","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060629604","display_name":"Rehan Ahmad","orcid":"https://orcid.org/0000-0002-0194-6653"},"institutions":[{"id":"https://openalex.org/I121243025","display_name":"International Islamic University, Islamabad","ror":"https://ror.org/047w75g40","country_code":"PK","type":"education","lineage":["https://openalex.org/I121243025"]}],"countries":["PK"],"is_corresponding":true,"raw_author_name":"Rehan Ahmad","raw_affiliation_strings":["Department of Electrical Engineering, International Islamic University, Islamabad 44000, Pakistan"],"raw_orcid":"https://orcid.org/0000-0002-0194-6653","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, International Islamic University, Islamabad 44000, Pakistan","institution_ids":["https://openalex.org/I121243025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043995656","display_name":"Syed Zubair","orcid":"https://orcid.org/0000-0003-0897-2448"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Syed Zubair","raw_affiliation_strings":["Analytics Camp, Islamabad 44000, Pakistan"],"raw_orcid":"https://orcid.org/0000-0003-0897-2448","affiliations":[{"raw_affiliation_string":"Analytics Camp, Islamabad 44000, Pakistan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090343976","display_name":"Hani Alquhayz","orcid":"https://orcid.org/0000-0001-8445-7742"},"institutions":[{"id":"https://openalex.org/I195631090","display_name":"Majmaah University","ror":"https://ror.org/01mcrnj60","country_code":"SA","type":"education","lineage":["https://openalex.org/I195631090"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Hani Alquhayz","raw_affiliation_strings":["Department of Computer Science and Information, College of Science in Zulfi, Majmaah University, Al-Majmaah 11952, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0001-8445-7742","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information, College of Science in Zulfi, Majmaah University, Al-Majmaah 11952, Saudi Arabia","institution_ids":["https://openalex.org/I195631090"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020009828","display_name":"Allah Ditta","orcid":"https://orcid.org/0000-0003-1519-5982"},"institutions":[{"id":"https://openalex.org/I5100685","display_name":"University of Education","ror":"https://ror.org/052z7nw84","country_code":"PK","type":"education","lineage":["https://openalex.org/I5100685"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Allah Ditta","raw_affiliation_strings":["Division of Science &amp; Technology, University of Education, Township, Lahore 54770, Pakistan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Division of Science &amp; Technology, University of Education, Township, Lahore 54770, Pakistan","institution_ids":["https://openalex.org/I5100685"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5060629604"],"corresponding_institution_ids":["https://openalex.org/I121243025"],"apc_list":{"value":2400,"currency":"CHF","value_usd":2598},"apc_paid":{"value":2400,"currency":"CHF","value_usd":2598},"fwci":0.3338,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.58076104,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"19","issue":"23","first_page":"5163","last_page":"5163"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.853941798210144},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8040667176246643},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.736728310585022},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.5248891711235046},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4963453412055969},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.487884521484375},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4526869058609009},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.429060697555542},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.426601767539978},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37465018033981323},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.31450599431991577},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07300177216529846}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.853941798210144},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8040667176246643},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.736728310585022},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.5248891711235046},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4963453412055969},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.487884521484375},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4526869058609009},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.429060697555542},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.426601767539978},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37465018033981323},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.31450599431991577},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07300177216529846},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/s19235163","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s19235163","pdf_url":"https://www.mdpi.com/1424-8220/19/23/5163/pdf?version=1575439577","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},{"id":"pmid:31775385","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/31775385","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors (Basel, Switzerland)","raw_type":null},{"id":"pmh:oai:mdpi.com:/1424-8220/19/23/5163/","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3390/s19235163","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sensors","raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:6929047","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/6929047","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sensors (Basel)","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/s19235163","is_oa":true,"landing_page_url":"https://doi.org/10.3390/s19235163","pdf_url":"https://www.mdpi.com/1424-8220/19/23/5163/pdf?version=1575439577","source":{"id":"https://openalex.org/S101949793","display_name":"Sensors","issn_l":"1424-8220","issn":["1424-8220"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sensors","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.46000000834465027,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W2991553872.pdf"},"referenced_works_count":66,"referenced_works":["https://openalex.org/W86660359","https://openalex.org/W1482605500","https://openalex.org/W1539515721","https://openalex.org/W1562651896","https://openalex.org/W1583773829","https://openalex.org/W1584960981","https://openalex.org/W1595802121","https://openalex.org/W1608667326","https://openalex.org/W1689977300","https://openalex.org/W1762918658","https://openalex.org/W1937412845","https://openalex.org/W1965819578","https://openalex.org/W1971791733","https://openalex.org/W1971901154","https://openalex.org/W1991048564","https://openalex.org/W2005708641","https://openalex.org/W2049893888","https://openalex.org/W2084637638","https://openalex.org/W2086086554","https://openalex.org/W2096391593","https://openalex.org/W2106488367","https://openalex.org/W2115175902","https://openalex.org/W2115252128","https://openalex.org/W2118847468","https://openalex.org/W2123921937","https://openalex.org/W2124247842","https://openalex.org/W2126432406","https://openalex.org/W2136879537","https://openalex.org/W2142352693","https://openalex.org/W2147520277","https://openalex.org/W2150769028","https://openalex.org/W2153842971","https://openalex.org/W2165232124","https://openalex.org/W2169165592","https://openalex.org/W2187637362","https://openalex.org/W2191779130","https://openalex.org/W2236062296","https://openalex.org/W2250436945","https://openalex.org/W2283724112","https://openalex.org/W2316138215","https://openalex.org/W2338994564","https://openalex.org/W2408333118","https://openalex.org/W2525932165","https://openalex.org/W2604379605","https://openalex.org/W2638067502","https://openalex.org/W2743167696","https://openalex.org/W2746241180","https://openalex.org/W2750259098","https://openalex.org/W2796898951","https://openalex.org/W2889203268","https://openalex.org/W2890964092","https://openalex.org/W2896538040","https://openalex.org/W2950628382","https://openalex.org/W2963470929","https://openalex.org/W2963702081","https://openalex.org/W3100703861","https://openalex.org/W4285719527","https://openalex.org/W6628911050","https://openalex.org/W6634978355","https://openalex.org/W6637929209","https://openalex.org/W6659344013","https://openalex.org/W6677618333","https://openalex.org/W6682600690","https://openalex.org/W6684648996","https://openalex.org/W6735927292","https://openalex.org/W6760001989"],"related_works":["https://openalex.org/W2041903645","https://openalex.org/W2039585813","https://openalex.org/W2059891707","https://openalex.org/W210813388","https://openalex.org/W2362790197","https://openalex.org/W2041695853","https://openalex.org/W3087422378","https://openalex.org/W2185667427","https://openalex.org/W2389696717","https://openalex.org/W2548055032"],"abstract_inverted_index":{"Speaker":[0],"diarization":[1,34,212],"systems":[2],"aim":[3],"to":[4,54,87,126,188,206],"find":[5,55],"'who":[6],"spoke":[7],"when?'":[8],"in":[9,136,182],"multi-speaker":[10],"recordings.":[11,24,172],"The":[12,197],"dataset":[13],"usually":[14],"consists":[15],"of":[16,73,111,152,157,160,167,170,184,199,216],"meetings,":[17],"TV/talk":[18],"shows,":[19],"telephone":[20],"and":[21,62,83,164,190],"multi-party":[22],"interaction":[23],"In":[25],"this":[26],"paper,":[27],"we":[28],"propose":[29],"a":[30,59,79,94,149],"novel":[31],"multimodal":[32,171,211],"speaker":[33,40,138,195],"technique,":[35],"which":[36,98,213],"finds":[37],"the":[38,56,63,88,109,118,120,179,200,207],"active":[39],"through":[41],"audio-visual":[42,49],"synchronization":[43,50,57],"model":[44,51,92,130],"for":[45,162],"diarization.":[46,196],"A":[47,173],"pre-trained":[48,89],"is":[52,93,176],"used":[53,125],"between":[58],"visible":[60],"person":[61],"respective":[64,104,121],"audio.":[65],"For":[66],"that":[67],"purpose,":[68],"short":[69],"video":[70,114],"segments":[71,115],"comprised":[72],"face-only":[74],"regions":[75],"are":[76,84,124,203],"acquired":[77],"using":[78],"face":[80],"detection":[81],"technique":[82,202],"then":[85],"fed":[86],"model.":[90],"This":[91,133],"two":[95],"streamed":[96],"network":[97],"matches":[99],"audio":[100,122,163,193],"frames":[101,123],"with":[102,141,178],"their":[103],"visual":[105],"input":[106],"segments.":[107],"On":[108],"basis":[110],"high":[112,142],"confidence":[113],"inferred":[116],"by":[117],"model,":[119],"train":[127],"Gaussian":[128],"mixture":[129],"(GMM)-based":[131],"clusters.":[132],"method":[134,181],"helps":[135],"generating":[137],"specific":[139],"clusters":[140],"probability.":[143],"We":[144],"tested":[145],"our":[146],"approach":[147],"on":[148],"popular":[150],"subset":[151],"AMI":[153],"meeting":[154],"corpus":[155],"consisting":[156],"5.4":[158],"h":[159,166],"recordings":[161],"5.8":[165],"different":[168],"set":[169],"significant":[174],"improvement":[175],"noticed":[177],"proposed":[180,201],"term":[183],"DER":[185],"when":[186],"compared":[187],"conventional":[189],"fully":[191],"supervised":[192],"based":[194],"results":[198],"very":[204],"close":[205],"complex":[208],"state-of-the":[209],"art":[210],"shows":[214],"significance":[215],"such":[217],"simple":[218],"yet":[219],"effective":[220],"technique.":[221]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-01-15T23:16:33.117629","created_date":"2025-10-10T00:00:00"}
