{"id":"https://openalex.org/W4283817920","doi":"https://doi.org/10.21437/interspeech.2022-75","title":"Automatic Evaluation of Speaker Similarity","display_name":"Automatic Evaluation of Speaker Similarity","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4283817920","doi":"https://doi.org/10.21437/interspeech.2022-75"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-75","is_oa":true,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-75","pdf_url":"https://www.isca-archive.org/interspeech_2022/deja22_interspeech.pdf","source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.isca-archive.org/interspeech_2022/deja22_interspeech.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026378980","display_name":"Kamil Rafa\u0142 Deja","orcid":"https://orcid.org/0000-0003-1156-5544"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Kamil Deja","raw_affiliation_strings":["Warsaw University of Technology, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Warsaw University of Technology, Poland","institution_ids":["https://openalex.org/I108403487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107424977","display_name":"Ariadna S\u00e1nchez","orcid":"https://orcid.org/0000-0003-0409-1328"},"institutions":[{"id":"https://openalex.org/I4210123934","display_name":"Amazon (United Kingdom)","ror":"https://ror.org/02xey9634","country_code":"GB","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210123934"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ariadna Sanchez","raw_affiliation_strings":["Amazon Research, Cambridge, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Research, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210123934"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053794772","display_name":"Julian Roth","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Julian Roth","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5038640628","display_name":"Marius Cotescu","orcid":"https://orcid.org/0000-0001-7005-2920"},"institutions":[{"id":"https://openalex.org/I4210123934","display_name":"Amazon (United Kingdom)","ror":"https://ror.org/02xey9634","country_code":"GB","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210123934"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Marius Cotescu","raw_affiliation_strings":["Amazon Research, Cambridge, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Research, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210123934"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4152,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.56485555,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2348","last_page":"2352"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7310593128204346},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6418508291244507},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5224334597587585},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.500648021697998},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40953877568244934}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7310593128204346},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6418508291244507},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5224334597587585},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.500648021697998},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40953877568244934},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-75","is_oa":true,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-75","pdf_url":"https://www.isca-archive.org/interspeech_2022/deja22_interspeech.pdf","source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.21437/interspeech.2022-75","is_oa":true,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-75","pdf_url":"https://www.isca-archive.org/interspeech_2022/deja22_interspeech.pdf","source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4283817920.pdf","grobid_xml":"https://content.openalex.org/works/W4283817920.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1492383498","https://openalex.org/W1540596182","https://openalex.org/W2078169166","https://openalex.org/W2107860279","https://openalex.org/W2557915412","https://openalex.org/W2612434969","https://openalex.org/W2619368999","https://openalex.org/W2808631503","https://openalex.org/W2888968865","https://openalex.org/W2903739847","https://openalex.org/W2922332774","https://openalex.org/W2946200149","https://openalex.org/W2962788625","https://openalex.org/W2962959915","https://openalex.org/W2963035245","https://openalex.org/W2963386851","https://openalex.org/W2963609956","https://openalex.org/W2972394484","https://openalex.org/W2987307811","https://openalex.org/W3022876224","https://openalex.org/W3030437843","https://openalex.org/W3097627357","https://openalex.org/W3097934054","https://openalex.org/W3098557217","https://openalex.org/W3131643166","https://openalex.org/W3137249133","https://openalex.org/W3161492781","https://openalex.org/W3162770051","https://openalex.org/W3184815887","https://openalex.org/W3197113339","https://openalex.org/W4235132546","https://openalex.org/W4283837596","https://openalex.org/W4289383906","https://openalex.org/W4289761690","https://openalex.org/W4294619240","https://openalex.org/W4298857617"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"We":[0],"introduce":[1],"a":[2,22,54,62,72,143],"new":[3,50,58,63],"automatic":[4,100],"evaluation":[5],"method":[6,101],"for":[7,102],"speaker":[8,37,69,105,115,124,147,152],"similarity":[9,148],"assessment,":[10],"that":[11,107,139],"is":[12,30,90],"consistent":[13],"with":[14,130,154],"human":[15],"perceptual":[16,93],"scores.Modern":[17],"neural":[18],"text-to-speech":[19],"models":[20,38,48,126],"require":[21],"vast":[23],"amount":[24],"of":[25,57,71,79,104],"clean":[26],"training":[27],"data,":[28],"which":[29],"why":[31],"many":[32,45],"solutions":[33,40],"switch":[34],"from":[35,44,151],"single":[36],"to":[39,86,145,161],"trained":[41],"on":[42,114],"examples":[43],"different":[46,121],"speakers.Multi-speaker":[47],"bring":[49],"possibilities,":[51],"such":[52],"as":[53],"faster":[55],"creation":[56],"voices,":[59],"but":[60],"also":[61],"problem":[64],"-speaker":[65],"leakage,":[66],"where":[67],"the":[68,80,83,111,166],"identity":[70],"synthesized":[73],"example":[74],"might":[75],"not":[76],"match":[77],"those":[78],"target":[81],"speaker.Currently,":[82],"only":[84],"way":[85],"discover":[87],"this":[88,95],"issue":[89],"through":[91],"costly":[92],"evaluations.In":[94],"work,":[96],"we":[97,109,140],"propose":[98],"an":[99],"assessment":[103],"similarity.For":[106],"purpose,":[108],"extend":[110],"recent":[112],"work":[113],"verification":[116],"systems":[117],"and":[118,123,133,157],"evaluate":[119],"how":[120],"metrics":[122],"embeddings":[125,153],"reflect":[127],"Multiple":[128],"Stimuli":[129],"Hidden":[131],"Reference":[132],"Anchor":[134],"(MUSHRA)":[135],"scores.Our":[136],"experiments":[137],"show":[138],"can":[141],"train":[142],"model":[144],"predict":[146],"MUSHRA":[149],"scores":[150],"0.96":[155],"accuracy":[156],"significant":[158],"correlation":[159],"up":[160],"0.78":[162],"Pearson":[163],"score":[164],"at":[165],"utterance":[167],"level.":[168]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-19T15:47:20.252518","created_date":"2025-10-10T00:00:00"}
