{"id":"https://openalex.org/W3109950060","doi":"https://doi.org/10.1109/taslp.2020.3040626","title":"Auxiliary Networks for Joint Speaker Adaptation and Speaker Change Detection","display_name":"Auxiliary Networks for Joint Speaker Adaptation and Speaker Change Detection","publication_year":2020,"publication_date":"2020-11-25","ids":{"openalex":"https://openalex.org/W3109950060","doi":"https://doi.org/10.1109/taslp.2020.3040626","mag":"3109950060"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2020.3040626","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2020.3040626","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09271936.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09271936.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003052411","display_name":"Leda Sar\u0131","orcid":"https://orcid.org/0000-0002-3754-1156"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Leda Sari","raw_affiliation_strings":["Department of Electrical, and Computer Engineering, and Beckman Institute, University of Illinois at Urbana-Champaign, Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, and Computer Engineering, and Beckman Institute, University of Illinois at Urbana-Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004778663","display_name":"Mark Hasegawa\u2010Johnson","orcid":"https://orcid.org/0000-0002-5631-2893"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mark Hasegawa-Johnson","raw_affiliation_strings":["Department of Electrical, and Computer Engineering, and Beckman Institute, University of Illinois at Urbana-Champaign, Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, and Computer Engineering, and Beckman Institute, University of Illinois at Urbana-Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101787514","display_name":"Samuel Thomas","orcid":"https://orcid.org/0000-0001-7573-0620"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samuel Thomas","raw_affiliation_strings":["IBM Research AI, IBM T. J. Watson Research Center, New York, NY"],"affiliations":[{"raw_affiliation_string":"IBM Research AI, IBM T. J. Watson Research Center, New York, NY","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5003052411"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":0.9279,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.80974044,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"29","issue":null,"first_page":"324","last_page":"333"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7902606129646301},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7129524946212769},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.7055221796035767},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5580546259880066},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5369759202003479},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4932639002799988},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4802184998989105},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.46793124079704285},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4621473252773285},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.4475487172603607},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.43169838190078735},{"id":"https://openalex.org/keywords/change-detection","display_name":"Change detection","score":0.41300541162490845},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3533977270126343},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34350690245628357}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7902606129646301},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7129524946212769},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.7055221796035767},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5580546259880066},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5369759202003479},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4932639002799988},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4802184998989105},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.46793124079704285},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4621473252773285},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.4475487172603607},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.43169838190078735},{"id":"https://openalex.org/C203595873","wikidata":"https://www.wikidata.org/wiki/Q25389927","display_name":"Change detection","level":2,"score":0.41300541162490845},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3533977270126343},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34350690245628357},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2020.3040626","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2020.3040626","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09271936.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/taslp.2020.3040626","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2020.3040626","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/9289074/09271936.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320316786","display_name":"Center for Cognitive Computing Systems Research","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3109950060.pdf","grobid_xml":"https://content.openalex.org/works/W3109950060.grobid-xml"},"referenced_works_count":58,"referenced_works":["https://openalex.org/W21371201","https://openalex.org/W1482605500","https://openalex.org/W1484181928","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1828163288","https://openalex.org/W1965819578","https://openalex.org/W1993409002","https://openalex.org/W2002342963","https://openalex.org/W2010114458","https://openalex.org/W2048526313","https://openalex.org/W2056738732","https://openalex.org/W2064675550","https://openalex.org/W2079623482","https://openalex.org/W2094147890","https://openalex.org/W2100969003","https://openalex.org/W2112021726","https://openalex.org/W2121981798","https://openalex.org/W2140567543","https://openalex.org/W2146871184","https://openalex.org/W2150769028","https://openalex.org/W2160306971","https://openalex.org/W2165108269","https://openalex.org/W2295119550","https://openalex.org/W2398776621","https://openalex.org/W2407299475","https://openalex.org/W2587150483","https://openalex.org/W2597757402","https://openalex.org/W2673722796","https://openalex.org/W2746241180","https://openalex.org/W2748488820","https://openalex.org/W2796339975","https://openalex.org/W2888776417","https://openalex.org/W2889267317","https://openalex.org/W2889374926","https://openalex.org/W2889418727","https://openalex.org/W2890964092","https://openalex.org/W2936794852","https://openalex.org/W2963266252","https://openalex.org/W2963381607","https://openalex.org/W2963702081","https://openalex.org/W2964121744","https://openalex.org/W2970971581","https://openalex.org/W2972322676","https://openalex.org/W2972621414","https://openalex.org/W2973127116","https://openalex.org/W3006439955","https://openalex.org/W3007550212","https://openalex.org/W3015501067","https://openalex.org/W4295312788","https://openalex.org/W6600930061","https://openalex.org/W6628802092","https://openalex.org/W6628911050","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6638749077","https://openalex.org/W6697090674","https://openalex.org/W6766978945"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2162158162","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W4310979479","https://openalex.org/W2696990509","https://openalex.org/W1999004162","https://openalex.org/W4280634406"],"abstract_inverted_index":{"Speaker":[0],"adaptation":[1,38,86,113],"and":[2,36,89,141,158],"speaker":[3,26,37,45,77,85,90,95,132,190],"change":[4,27,91,154],"detection":[5,28],"have":[6],"both":[7,59,84],"been":[8],"studied":[9],"extensively":[10],"to":[11,32,57,107,170],"improve":[12],"automatic":[13],"speech":[14],"recognition":[15],"(ASR).":[16],"In":[17,65,134],"many":[18],"cases,":[19],"these":[20],"two":[21],"problems":[22],"are":[23],"investigated":[24],"separately:":[25],"is":[29,39,97,114],"implemented":[30],"first":[31],"obtain":[33],"single-speaker":[34],"regions,":[35],"then":[40,115],"performed":[41,116],"using":[42,99],"the":[43,120,130,142,161,171],"derived":[44],"segments":[46],"for":[47,87],"improved":[48],"ASR.":[49],"However,":[50],"in":[51,61,156,178,200],"an":[52,103],"online":[53],"setting,":[54],"we":[55,68,146],"want":[56],"achieve":[58],"goals":[60],"a":[62,70,76,108,124,137,153],"single":[63],"pass.":[64],"this":[66],"study,":[67],"propose":[69],"neural":[71],"network":[72,105,122,174],"architecture":[73,185],"that":[74,160],"learns":[75],"embedding":[78,96],"from":[79,119],"which":[80],"it":[81],"can":[82],"perform":[83],"ASR":[88,110,112,195],"detection.":[92],"The":[93,183],"proposed":[94,162,184],"computed":[98],"self-attention":[100],"based":[101],"on":[102,136,150],"auxiliary":[104],"attached":[106],"main":[109,121,173],"network.":[111],"by":[117,194],"subtracting,":[118],"activations,":[123],"segment":[125],"dependent":[126],"affine":[127],"transformation":[128],"of":[129],"learned":[131],"embedding.":[133],"experiments":[135],"broadcast":[138],"news":[139],"dataset":[140],"Switchboard":[143],"conversational":[144],"dataset,":[145],"test":[147],"our":[148],"system":[149],"utterances":[151],"with":[152],"point":[155],"them":[157],"show":[159],"method":[163],"achieves":[164],"significantly":[165],"better":[166],"performance":[167],"as":[168],"compared":[169],"unadapted":[172],"(10-14%":[175],"relative":[176,198],"reduction":[177,199],"word":[179],"error":[180],"rate":[181],"(WER)).":[182],"also":[186],"outperforms":[187],"three":[188],"different":[189],"segmentation":[191],"methods":[192],"followed":[193],"(around":[196],"10%":[197],"WER).":[201]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
