{"id":"https://openalex.org/W3165996626","doi":"https://doi.org/10.21437/interspeech.2021-1864","title":"Speaker-Conversation Factorial Designs for Diarization Error Analysis","display_name":"Speaker-Conversation Factorial Designs for Diarization Error Analysis","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3165996626","doi":"https://doi.org/10.21437/interspeech.2021-1864","mag":"3165996626"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-1864","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-1864","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.05792","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061545233","display_name":"Scott Seyfarth","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Scott Seyfarth","raw_affiliation_strings":["Amazon"],"affiliations":[{"raw_affiliation_string":"Amazon","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047128701","display_name":"Sundararajan Srinivasan","orcid":"https://orcid.org/0000-0002-3387-9889"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sundararajan Srinivasan","raw_affiliation_strings":["Amazon"],"affiliations":[{"raw_affiliation_string":"Amazon","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050325468","display_name":"Katrin Kirchhoff","orcid":"https://orcid.org/0000-0002-6645-6030"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Katrin Kirchhoff","raw_affiliation_strings":["Amazon"],"affiliations":[{"raw_affiliation_string":"Amazon","institution_ids":["https://openalex.org/I4210089985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5061545233"],"corresponding_institution_ids":["https://openalex.org/I4210089985"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06533509,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"756","last_page":"760"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.8509117364883423},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.8392815589904785},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6838489174842834},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6641643047332764},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.47512751817703247},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4725251793861389},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4683491587638855},{"id":"https://openalex.org/keywords/factorial","display_name":"Factorial","score":0.45284098386764526},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35358405113220215},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3260570168495178},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3129898011684418},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1998630166053772},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10771843791007996},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.07724446058273315}],"concepts":[{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.8509117364883423},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.8392815589904785},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6838489174842834},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6641643047332764},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.47512751817703247},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4725251793861389},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4683491587638855},{"id":"https://openalex.org/C183763347","wikidata":"https://www.wikidata.org/wiki/Q120976","display_name":"Factorial","level":2,"score":0.45284098386764526},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35358405113220215},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3260570168495178},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3129898011684418},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1998630166053772},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10771843791007996},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.07724446058273315},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.21437/interspeech.2021-1864","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-1864","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2106.05792","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.05792","pdf_url":"https://arxiv.org/pdf/2106.05792","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"mag:3165996626","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2106.05792","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:share.osf.io:94f4ac68-8299-47ce-adc2-f8b12a0dbf51","is_oa":false,"landing_page_url":"https://osf.io/32vpc","pdf_url":null,"source":{"id":"https://openalex.org/S4306401127","display_name":"OSF Preprints (OSF Preprints)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2799848540","host_organization_name":"Center for Open Science","host_organization_lineage":["https://openalex.org/I2799848540"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Project"},{"id":"pmh:oai:share.osf.io:E0017-2DB-94C","is_oa":false,"landing_page_url":"http://osf.io/32vpc/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401127","display_name":"OSF Preprints (OSF Preprints)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2799848540","host_organization_name":"Center for Open Science","host_organization_lineage":["https://openalex.org/I2799848540"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"project"},{"id":"doi:10.48550/arxiv.2106.05792","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.05792","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.05792","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.05792","pdf_url":"https://arxiv.org/pdf/2106.05792","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[{"display_name":"Gender equality","id":"https://metadata.un.org/sdg/5","score":0.4699999988079071},{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4099999964237213}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3165996626.pdf","grobid_xml":"https://content.openalex.org/works/W3165996626.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W97072897","https://openalex.org/W1524333225","https://openalex.org/W1579840820","https://openalex.org/W1589137271","https://openalex.org/W1949678325","https://openalex.org/W1963934770","https://openalex.org/W2032330493","https://openalex.org/W2085464700","https://openalex.org/W2105890227","https://openalex.org/W2106103082","https://openalex.org/W2121812409","https://openalex.org/W2130375591","https://openalex.org/W2136032552","https://openalex.org/W2153555222","https://openalex.org/W2162634167","https://openalex.org/W2165758834","https://openalex.org/W2398324643","https://openalex.org/W2577537660","https://openalex.org/W2638067502","https://openalex.org/W2805869973","https://openalex.org/W2888586828","https://openalex.org/W2890964092","https://openalex.org/W2936780106","https://openalex.org/W2950628382","https://openalex.org/W2962758893","https://openalex.org/W2972949456","https://openalex.org/W3008357631","https://openalex.org/W3015783745","https://openalex.org/W3125596972"],"related_works":["https://openalex.org/W3197109309","https://openalex.org/W2398628094","https://openalex.org/W2398046462","https://openalex.org/W2069642050","https://openalex.org/W2746023138","https://openalex.org/W3005625664","https://openalex.org/W1658556320","https://openalex.org/W164219236","https://openalex.org/W2407688633","https://openalex.org/W2575447002","https://openalex.org/W780134238","https://openalex.org/W3212278860","https://openalex.org/W1502693945","https://openalex.org/W1538006706","https://openalex.org/W3102865055","https://openalex.org/W135211875","https://openalex.org/W2463927909","https://openalex.org/W2296479786","https://openalex.org/W2119968850","https://openalex.org/W2950939535"],"abstract_inverted_index":{"Speaker":[0],"diarization":[1,16,36,57,82,147],"accuracy":[2,37,58,78,113],"can":[3,46,140],"be":[4,141],"affected":[5],"by":[6,59],"both":[7],"acoustics":[8,23],"and":[9,24,28,35,53,76,98,145],"conversation":[10,25,54],"characteristics.":[11],"Determining":[12],"the":[13,29,116,129,138],"cause":[14],"of":[15,51,93],"errors":[17],"is":[18,71],"difficult":[19],"because":[20],"speaker":[21],"voice":[22],"structure":[26],"co-vary,":[27],"interactions":[30],"between":[31],"acoustics,":[32],"conversational":[33,122],"structure,":[34,123],"are":[38,125],"complex.":[39],"This":[40],"paper":[41],"proposes":[42],"a":[43,63,84,91,99,104],"methodology":[44,139],"that":[45],"distinguish":[47],"independent":[48],"marginal":[49],"effects":[50],"acoustic":[52],"characteristics":[55],"on":[56,103],"remixing":[60],"conversations":[61],"in":[62,128],"factorial":[64],"design.":[65],"As":[66],"an":[67],"illustration,":[68],"this":[69],"approach":[70],"used":[72,142],"to":[73,121,143],"investigate":[74],"gender-related":[75],"language-related":[77],"differences":[79],"with":[80,95],"three":[81],"systems:":[83],"baseline":[85,117],"system":[86,101,118],"using":[87],"subsegment":[88],"x-vector":[89],"clustering,":[90],"variant":[92],"it":[94],"shorter":[96],"subsegments,":[97],"third":[100],"based":[102],"Bayesian":[105],"hidden":[106],"Markov":[107],"model.":[108],"Our":[109],"analysis":[110],"shows":[111],"large":[112],"disparities":[114],"for":[115],"primarily":[119],"due":[120],"which":[124],"partially":[126],"mitigated":[127],"other":[130],"two":[131],"systems.":[132],"The":[133],"illustration":[134],"thus":[135],"demonstrates":[136],"how":[137],"identify":[144],"guide":[146],"model":[148],"improvements.":[149]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
