{"id":"https://openalex.org/W2612266954","doi":"https://doi.org/10.1109/icassp.2017.7953098","title":"Speaker diarization: A perspective on challenges and opportunities from theory to practice","display_name":"Speaker diarization: A perspective on challenges and opportunities from theory to practice","publication_year":2017,"publication_date":"2017-03-01","ids":{"openalex":"https://openalex.org/W2612266954","doi":"https://doi.org/10.1109/icassp.2017.7953098","mag":"2612266954"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2017.7953098","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7953098","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016543371","display_name":"Kenneth Church","orcid":"https://orcid.org/0000-0001-8378-6069"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kenneth Church","raw_affiliation_strings":["IBM, USA"],"affiliations":[{"raw_affiliation_string":"IBM, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103551993","display_name":"Weizhong Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weizhong Zhu","raw_affiliation_strings":["IBM, USA"],"affiliations":[{"raw_affiliation_string":"IBM, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002475003","display_name":"Josef Vopicka","orcid":null},"institutions":[{"id":"https://openalex.org/I4210149077","display_name":"IBM (Czechia)","ror":"https://ror.org/04dzqh472","country_code":"CZ","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210149077"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Josef Vopicka","raw_affiliation_strings":["IBM, Czech Republic"],"affiliations":[{"raw_affiliation_string":"IBM, Czech Republic","institution_ids":["https://openalex.org/I4210149077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029030495","display_name":"Jason Pelecanos","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Pelecanos","raw_affiliation_strings":["IBM, USA"],"affiliations":[{"raw_affiliation_string":"IBM, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115044944","display_name":"Dimitrios Dimitriadis","orcid":"https://orcid.org/0000-0001-8483-0105"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dimitrios Dimitriadis","raw_affiliation_strings":["IBM, USA"],"affiliations":[{"raw_affiliation_string":"IBM, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006171209","display_name":"Petr Fousek","orcid":null},"institutions":[{"id":"https://openalex.org/I4210149077","display_name":"IBM (Czechia)","ror":"https://ror.org/04dzqh472","country_code":"CZ","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210149077"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Petr Fousek","raw_affiliation_strings":["IBM, Czech Republic"],"affiliations":[{"raw_affiliation_string":"IBM, Czech Republic","institution_ids":["https://openalex.org/I4210149077"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5016543371"],"corresponding_institution_ids":["https://openalex.org/I1341412227"],"apc_list":null,"apc_paid":null,"fwci":0.9751,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.81471244,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4950","last_page":"4954"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.7818058729171753},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6105003952980042},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.44601890444755554},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.2843702435493469},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24057593941688538},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.19137093424797058}],"concepts":[{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.7818058729171753},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6105003952980042},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.44601890444755554},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2843702435493469},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24057593941688538},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.19137093424797058}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2017.7953098","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7953098","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1502693945","https://openalex.org/W1965819578","https://openalex.org/W2018642651","https://openalex.org/W2023582935","https://openalex.org/W2038101708","https://openalex.org/W2081074144","https://openalex.org/W2093499222","https://openalex.org/W2106793713","https://openalex.org/W2144125830","https://openalex.org/W2150769028","https://openalex.org/W2159591770","https://openalex.org/W2166980079","https://openalex.org/W2397837022","https://openalex.org/W2405137844","https://openalex.org/W2408591905","https://openalex.org/W2497516627","https://openalex.org/W2525831154","https://openalex.org/W2916986993","https://openalex.org/W2963490782","https://openalex.org/W6630205719","https://openalex.org/W6639288305","https://openalex.org/W6714141289"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W4389272081","https://openalex.org/W135211875","https://openalex.org/W2052269122","https://openalex.org/W89843531","https://openalex.org/W1549221042","https://openalex.org/W4319586547","https://openalex.org/W3011547127","https://openalex.org/W3008107337","https://openalex.org/W2807391387"],"abstract_inverted_index":{"This":[0],"paper":[1],"discusses":[2],"some":[3,24],"challenges":[4],"and":[5,37,50,70,81,99],"opportunities":[6],"in":[7,40,66],"developing":[8],"a":[9,29],"speaker":[10,68,79,91],"diarization":[11,69,80],"system":[12],"for":[13],"operation":[14],"on":[15],"real":[16],"world":[17],"call":[18,41,93],"center":[19,94],"telephony":[20],"data.":[21],"We":[22],"contrast":[23],"of":[25,121],"the":[26,89,105,109,119],"differences":[27,46],"between":[28],"standard":[30],"data":[31],"set":[32],"akin":[33],"to":[34,53,87,117],"NIST":[35],"evaluations":[36],"those":[38],"found":[39],"centers.":[42],"In":[43,56],"exploring":[44],"these":[45],"we":[47,62,76],"discovered":[48],"vulnerabilities":[49],"proposed":[51],"changes":[52],"address":[54],"them.":[55],"moving":[57],"from":[58],"theory":[59],"into":[60],"practice":[61],"introduce":[63],"two":[64],"tasks":[65],"which":[67],"recognition":[71,82,112],"can":[72,84,114],"be":[73,85,115],"leveraged.":[74],"First,":[75],"show":[77],"that":[78],"systems":[83],"integrated":[86],"find":[88],"common":[90],"(the":[92],"agent)":[95],"across":[96],"multiple":[97],"calls":[98],"consequently":[100],"their":[101],"role.":[102],"Furthermore,":[103],"once":[104],"role":[106],"is":[107],"determined":[108],"corresponding":[110],"speech":[111],"output":[113],"analyzed":[116],"determine":[118],"type":[120],"support":[122],"call.":[123]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
