{"id":"https://openalex.org/W3004606968","doi":"https://doi.org/10.21437/odyssey.2020-28","title":"An Empirical Analysis of Information Encoded in Disentangled Neural Speaker Representations","display_name":"An Empirical Analysis of Information Encoded in Disentangled Neural Speaker Representations","publication_year":2020,"publication_date":"2020-05-15","ids":{"openalex":"https://openalex.org/W3004606968","doi":"https://doi.org/10.21437/odyssey.2020-28","mag":"3004606968"},"language":"en","primary_location":{"id":"doi:10.21437/odyssey.2020-28","is_oa":false,"landing_page_url":"https://doi.org/10.21437/odyssey.2020-28","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Speaker and Language Recognition Workshop (Odyssey 2020)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2002.03520","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086205666","display_name":"Raghuveer Peri","orcid":"https://orcid.org/0000-0002-1010-065X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Raghuveer Peri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101654854","display_name":"Haoqi Li","orcid":"https://orcid.org/0009-0007-5191-9999"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haoqi Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008305849","display_name":"Krishna Somandepalli","orcid":"https://orcid.org/0000-0002-2845-1079"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krishna Somandepalli","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067289130","display_name":"Arindam Jati","orcid":"https://orcid.org/0000-0002-9498-8536"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arindam Jati","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5010028928","display_name":"Shrikanth Narayanan","orcid":"https://orcid.org/0000-0002-1052-6204"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shrikanth Narayanan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5086205666"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.01508972,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"194","last_page":"201"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7682609558105469},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7322858572006226},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6674879193305969},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5514168739318848},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.533397912979126},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.5198505520820618},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.43105554580688477},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.41561394929885864},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.386817067861557}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7682609558105469},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7322858572006226},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6674879193305969},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5514168739318848},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.533397912979126},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.5198505520820618},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.43105554580688477},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.41561394929885864},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.386817067861557},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/odyssey.2020-28","is_oa":false,"landing_page_url":"https://doi.org/10.21437/odyssey.2020-28","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Speaker and Language Recognition Workshop (Odyssey 2020)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2002.03520","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2002.03520","pdf_url":"https://arxiv.org/pdf/2002.03520","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3004606968","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2002.03520.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2002.03520","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2002.03520","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2002.03520","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2002.03520","pdf_url":"https://arxiv.org/pdf/2002.03520","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3004606968.pdf","grobid_xml":"https://content.openalex.org/works/W3004606968.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W152668300","https://openalex.org/W1486536989","https://openalex.org/W1494198834","https://openalex.org/W2046056978","https://openalex.org/W2107638917","https://openalex.org/W2130633305","https://openalex.org/W2146334809","https://openalex.org/W2150769028","https://openalex.org/W2219249508","https://openalex.org/W2265195717","https://openalex.org/W2341972007","https://openalex.org/W2398971481","https://openalex.org/W2638067502","https://openalex.org/W2659927845","https://openalex.org/W2726515241","https://openalex.org/W2748318213","https://openalex.org/W2748488820","https://openalex.org/W2796485741","https://openalex.org/W2808631503","https://openalex.org/W2886300652","https://openalex.org/W2888955371","https://openalex.org/W2890189553","https://openalex.org/W2890704021","https://openalex.org/W2890964092","https://openalex.org/W2901368553","https://openalex.org/W2962776659","https://openalex.org/W2962788262","https://openalex.org/W2964058423","https://openalex.org/W2972921407","https://openalex.org/W2982817420","https://openalex.org/W3008391559","https://openalex.org/W3030437843"],"related_works":["https://openalex.org/W3030987249","https://openalex.org/W2790216641","https://openalex.org/W3111532638","https://openalex.org/W2332932830","https://openalex.org/W2907262790","https://openalex.org/W3176939825","https://openalex.org/W3046954891","https://openalex.org/W2099797668","https://openalex.org/W3047099449","https://openalex.org/W3198815374","https://openalex.org/W3015678936","https://openalex.org/W1492587309","https://openalex.org/W3198034710","https://openalex.org/W1987592298","https://openalex.org/W2889226978","https://openalex.org/W3014794275","https://openalex.org/W2305940569","https://openalex.org/W129643858","https://openalex.org/W3140429000","https://openalex.org/W2963753449"],"abstract_inverted_index":{"The":[0],"primary":[1],"characteristic":[2],"of":[3,14,22,27,34,78,95,116,138,146,203],"robust":[4],"speaker":[5,19,23,35,66,86,128,166,170,177,195,205],"representations":[6,24,36,67,129],"is":[7,25,105,173],"that":[8,41,54,155,218],"they":[9,140],"are":[10,42,55,118],"invariant":[11],"to":[12,18,31,37,85,98,107,111,143,162],"factors":[13,40,53,81,97,115,164,217],"variability":[15,117],"not":[16,83],"related":[17,84,142],"identity.":[20],"Disentanglement":[21,63],"one":[26],"the":[28,79,96,109,113,121,136,158,181,216,224],"techniques":[29],"used":[30],"improve":[32],"robustness":[33,193],"both":[38],"intrinsic":[39],"acquired":[43,56],"during":[44,57,201],"speech":[45],"production":[46],"(e.g.,":[47,60],"emotion,":[48],"lexical":[49],"content)":[50],"and":[51,131,228],"extrinsic":[52],"signal":[58],"capture":[59,141],"channel,":[61],"noise).":[62],"in":[64,72,89,120,184,194],"neural":[65],"can":[68,219],"be":[69,99,220],"achieved":[70],"either":[71,102],"a":[73,144],"supervised":[74],"fashion":[75,92],"with":[76,130,160],"annotations":[77],"nuisance":[80,163],"(factors":[82],"identity)":[87],"or":[88],"an":[90],"unsupervised":[91,133,225],"without":[93,132],"labels":[94],"removed.":[100],"In":[101,123],"case":[103],"it":[104],"important":[106],"understand":[108],"extent":[110],"which":[112],"various":[114],"entangled":[119],"representations.":[122],"this":[124],"work,":[125],"we":[126,151,212],"examine":[127],"disentanglement":[134,156,226],"for":[135],"amount":[137],"information":[139,159],"suite":[145],"factors.":[147],"Using":[148],"classification":[149],"experiments":[150,179],"provide":[152,213],"empirical":[153],"evidence":[154],"reduces":[157],"respect":[161],"from":[165],"representations,":[167],"while":[168],"retaining":[169],"information.":[171],"This":[172],"further":[174],"validated":[175],"by":[176],"verification":[178,196],"on":[180,209],"VOiCES":[182],"corpus":[183],"several":[185],"challenging":[186],"acoustic":[187],"conditions.":[188],"We":[189],"also":[190],"show":[191],"improved":[192],"tasks":[197],"using":[198,223],"data":[199],"augmentation":[200],"training":[202],"disentangled":[204],"embeddings.":[206],"Finally,":[207],"based":[208],"our":[210],"findings,":[211],"insights":[214],"into":[215],"effectively":[221],"separated":[222],"technique":[227],"discuss":[229],"potential":[230],"future":[231],"directions.":[232]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
