{"id":"https://openalex.org/W2129770175","doi":"https://doi.org/10.21437/interspeech.2014-141","title":"Speaker diarization using gesture and speech","display_name":"Speaker diarization using gesture and speech","publication_year":2014,"publication_date":"2014-09-14","ids":{"openalex":"https://openalex.org/W2129770175","doi":"https://doi.org/10.21437/interspeech.2014-141","mag":"2129770175"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2014-141","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2014-141","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2014","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/11858/00-001M-0000-0019-B65B-7","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010428903","display_name":"Binyam Gebrekidan Gebre","orcid":"https://orcid.org/0000-0003-2656-2055"},"institutions":[{"id":"https://openalex.org/I149899117","display_name":"Max Planck Society","ror":"https://ror.org/01hhn8329","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I149899117"]},{"id":"https://openalex.org/I4210089003","display_name":"Max Planck Institute for Psycholinguistics","ror":"https://ror.org/00671me87","country_code":"NL","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210089003"]}],"countries":["DE","NL"],"is_corresponding":false,"raw_author_name":"Binyam Gebrekidan Gebre","raw_affiliation_strings":["The Language Archive, MPI for Psycholinguistics, Max Planck Society"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Language Archive, MPI for Psycholinguistics, Max Planck Society","institution_ids":["https://openalex.org/I4210089003","https://openalex.org/I149899117"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056816256","display_name":"Peter Wittenburg","orcid":"https://orcid.org/0000-0003-3538-0106"},"institutions":[{"id":"https://openalex.org/I149899117","display_name":"Max Planck Society","ror":"https://ror.org/01hhn8329","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I149899117"]},{"id":"https://openalex.org/I4210089003","display_name":"Max Planck Institute for Psycholinguistics","ror":"https://ror.org/00671me87","country_code":"NL","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210089003"]}],"countries":["DE","NL"],"is_corresponding":false,"raw_author_name":"Peter Wittenburg","raw_affiliation_strings":["The Language Archive, MPI for Psycholinguistics, Max Planck Society"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Language Archive, MPI for Psycholinguistics, Max Planck Society","institution_ids":["https://openalex.org/I4210089003","https://openalex.org/I149899117"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001185195","display_name":"Sebastian Drude","orcid":"https://orcid.org/0000-0002-2970-7996"},"institutions":[{"id":"https://openalex.org/I149899117","display_name":"Max Planck Society","ror":"https://ror.org/01hhn8329","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I149899117"]},{"id":"https://openalex.org/I4210089003","display_name":"Max Planck Institute for Psycholinguistics","ror":"https://ror.org/00671me87","country_code":"NL","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210089003"]}],"countries":["DE","NL"],"is_corresponding":false,"raw_author_name":"Sebastian Drude","raw_affiliation_strings":["The Language Archive, MPI for Psycholinguistics, Max Planck Society"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Language Archive, MPI for Psycholinguistics, Max Planck Society","institution_ids":["https://openalex.org/I4210089003","https://openalex.org/I149899117"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111969286","display_name":"Marijn Huijbregts","orcid":null},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Marijn Huijbregts","raw_affiliation_strings":["RADBOUD UNIVERSITY"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RADBOUD UNIVERSITY","institution_ids":["https://openalex.org/I145872427"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015444349","display_name":"Tom Heskes","orcid":"https://orcid.org/0000-0002-3398-5235"},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Tom Heskes","raw_affiliation_strings":["RADBOUD UNIVERSITY"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RADBOUD UNIVERSITY","institution_ids":["https://openalex.org/I145872427"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4229,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.76137417,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"582","last_page":"586"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.8804576396942139},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7503652572631836},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6769177913665771},{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.5997456908226013},{"id":"https://openalex.org/keywords/association","display_name":"Association (psychology)","score":0.5091831088066101},{"id":"https://openalex.org/keywords/speech-communication","display_name":"Speech communication","score":0.486825168132782},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.42799362540245056},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24294611811637878},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.23025479912757874},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.10742861032485962}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.8804576396942139},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7503652572631836},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6769177913665771},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.5997456908226013},{"id":"https://openalex.org/C142853389","wikidata":"https://www.wikidata.org/wiki/Q744778","display_name":"Association (psychology)","level":2,"score":0.5091831088066101},{"id":"https://openalex.org/C2989496772","wikidata":"https://www.wikidata.org/wiki/Q52946","display_name":"Speech communication","level":2,"score":0.486825168132782},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.42799362540245056},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24294611811637878},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.23025479912757874},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.10742861032485962},{"id":"https://openalex.org/C542102704","wikidata":"https://www.wikidata.org/wiki/Q183257","display_name":"Psychotherapist","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2014-141","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2014-141","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2014","raw_type":"proceedings-article"},{"id":"pmh:oai:escidoc.org:escidoc:2032454","is_oa":true,"landing_page_url":"http://hdl.handle.net/11858/00-001M-0000-0019-B65B-7","pdf_url":"http://hdl.handle.net/11858/00-001M-0000-0019-B65B-7","source":{"id":"https://openalex.org/S7407052962","display_name":"Max Planck Digital Library","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:repository.ubn.ru.nl:2066/134621","is_oa":true,"landing_page_url":"https://hdl.handle.net/2066/134621","pdf_url":"http://hdl.handle.net/2066/134621","source":{"id":"https://openalex.org/S4306401067","display_name":"Radboud Repository (Radboud University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145872427","host_organization_name":"Radboud University Nijmegen","host_organization_lineage":["https://openalex.org/I145872427"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://www.isca-archive.org/interspeech_2014/gebre14_interspeech.pdf","raw_type":"Article in monograph or in proceedings"},{"id":"pmh:ru:oai:repository.ubn.ru.nl:2066/134621","is_oa":true,"landing_page_url":"http://hdl.handle.net/2066/134621","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Interspeech 2014: Proceedings of the 15th Annual Conference of the International Speech Communication Association, 14-18 Sept 2014, MAX Atria @ Singapore EXPO, 582 - 586. s.l. : ISCA","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:escidoc.org:escidoc:2032454","is_oa":true,"landing_page_url":"http://hdl.handle.net/11858/00-001M-0000-0019-B65B-7","pdf_url":"http://hdl.handle.net/11858/00-001M-0000-0019-B65B-7","source":{"id":"https://openalex.org/S7407052962","display_name":"Max Planck Digital Library","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W2129770175.pdf"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W88081813","https://openalex.org/W142991516","https://openalex.org/W1556474518","https://openalex.org/W1583620810","https://openalex.org/W1589339348","https://openalex.org/W1591300715","https://openalex.org/W1975745930","https://openalex.org/W2041823554","https://openalex.org/W2049633694","https://openalex.org/W2081074144","https://openalex.org/W2100969003","https://openalex.org/W2101234009","https://openalex.org/W2101600527","https://openalex.org/W2104380558","https://openalex.org/W2105890227","https://openalex.org/W2115175902","https://openalex.org/W2125336414","https://openalex.org/W2136032552","https://openalex.org/W2144761589","https://openalex.org/W2148154194","https://openalex.org/W2153842971","https://openalex.org/W2159591770","https://openalex.org/W2159730002","https://openalex.org/W2161969291","https://openalex.org/W2165880886","https://openalex.org/W2338994564","https://openalex.org/W2620757702","https://openalex.org/W3097096317"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2938358845","https://openalex.org/W2997340161"],"abstract_inverted_index":{"We":[0],"demonstrate":[1],"how":[2],"the":[3,27,51,56,61,64,103,134,162],"problem":[4,30,35],"of":[5,20,48,53,58,63,106,133,146],"speaker":[6,15,28,33,38],"diarization":[7,29],"can":[8],"be":[9],"solved":[10],"using":[11],"both":[12],"gesture":[13],"and":[14,55,77,91,109,112,151],"parametric":[16,83],"models.":[17],"The":[18],"novelty":[19],"our":[21,81,140],"solution":[22,141],"is":[23,160],"that":[24,139],"we":[25],"approach":[26,68],"as":[31],"a":[32],"recognition":[34],"after":[36],"learning":[37],"models":[39,84,98],"from":[40,125],"speech":[41,54],"samples":[42],"corresponding":[43],"to":[44,87,101,117],"gestures":[45,49,59],"(the":[46,158],"occurrence":[47],"indicates":[50,60],"presence":[52],"location":[57],"identity":[62],"speaker).":[65],"This":[66],"new":[67],"offers":[69],"many":[70],"advantages:":[71],"comparable":[72],"state-of-the-art":[73],"performance,":[74],"faster":[75],"computation":[76],"more":[78,94],"adaptability.":[79],"In":[80],"implementation,":[82],"are":[85,99,115],"used":[86,100,116],"model":[88,102,118],"speakers'":[89],"voice":[90,104],"their":[92],"gestures:":[93],"specifically,":[95],"Gaussian":[96],"mixture":[97],"characteristics":[105],"each":[107],"person":[108],"all":[110,154],"persons,":[111],"gamma":[113],"distributions":[114],"gestural":[119],"activity":[120],"based":[121],"on":[122,130,148,153],"features":[123],"extracted":[124],"Motion":[126],"History":[127],"Images.":[128],"Tests":[129],"4.24":[131],"hours":[132],"AMI":[135,163],"meeting":[136],"data":[137],"show":[138],"makes":[142],"DER":[143],"score":[144],"improvements":[145],"19%":[147],"speech-only":[149],"segments":[150,155],"4%":[152],"including":[156],"silence":[157],"comparison":[159],"with":[161],"system).":[164]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2026-06-22T08:00:12.763002","created_date":"2025-10-10T00:00:00"}
