{"id":"https://openalex.org/W4225890157","doi":"https://doi.org/10.1109/icassp43922.2022.9746964","title":"ASR-Aware End-to-End Neural Diarization","display_name":"ASR-Aware End-to-End Neural Diarization","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225890157","doi":"https://doi.org/10.1109/icassp43922.2022.9746964"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746964","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746964","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2202.01286","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013650472","display_name":"Aparna Khare","orcid":"https://orcid.org/0000-0001-7151-3055"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aparna Khare","raw_affiliation_strings":["Amazon Alexa AI,Sunnyvale,CA","Amazon Alexa AI, Sunnyvale, CA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,Sunnyvale,CA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, Sunnyvale, CA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005681147","display_name":"Eun\u2010Jung Han","orcid":"https://orcid.org/0000-0002-8385-3775"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eunjung Han","raw_affiliation_strings":["Amazon Alexa AI,Sunnyvale,CA","Amazon Alexa AI, Sunnyvale, CA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,Sunnyvale,CA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, Sunnyvale, CA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100402382","display_name":"Yuguang Yang","orcid":"https://orcid.org/0000-0003-1353-1815"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuguang Yang","raw_affiliation_strings":["Amazon Alexa AI,Sunnyvale,CA","Amazon Alexa AI, Sunnyvale, CA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,Sunnyvale,CA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, Sunnyvale, CA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114060825","display_name":"Andreas Stolcke","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andreas Stolcke","raw_affiliation_strings":["Amazon Alexa AI,Sunnyvale,CA","Amazon Alexa AI, Sunnyvale, CA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,Sunnyvale,CA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, Sunnyvale, CA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5013650472"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":0.8399,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.73516006,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"8092","last_page":"8096"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.829335629940033},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.7375519275665283},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7333994507789612},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.633731484413147},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6175402998924255},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5727771520614624},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4953095018863678},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.49009689688682556},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47533702850341797},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.42808935046195984},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4155115485191345},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06031414866447449}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.829335629940033},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.7375519275665283},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7333994507789612},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.633731484413147},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6175402998924255},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5727771520614624},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4953095018863678},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.49009689688682556},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47533702850341797},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.42808935046195984},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4155115485191345},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06031414866447449},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746964","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746964","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2202.01286","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.01286","pdf_url":"https://arxiv.org/pdf/2202.01286","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2202.01286","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.01286","pdf_url":"https://arxiv.org/pdf/2202.01286","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.550000011920929,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W2098726438","https://openalex.org/W2171631590","https://openalex.org/W2316138215","https://openalex.org/W2507319753","https://openalex.org/W2891176389","https://openalex.org/W2896538040","https://openalex.org/W2900212944","https://openalex.org/W2902864383","https://openalex.org/W2936774411","https://openalex.org/W2939690918","https://openalex.org/W2962760690","https://openalex.org/W2963227667","https://openalex.org/W2978017171","https://openalex.org/W2993724474","https://openalex.org/W3008357631","https://openalex.org/W3015737406","https://openalex.org/W3021527274","https://openalex.org/W3035299099","https://openalex.org/W3095212884","https://openalex.org/W3162770427","https://openalex.org/W3163139004","https://openalex.org/W3197905752","https://openalex.org/W3197916665","https://openalex.org/W4248634141","https://openalex.org/W4385245566","https://openalex.org/W6631362777","https://openalex.org/W6739901393"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2162158162","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2175373321","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2938358845","https://openalex.org/W1556857061"],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,45,54,86],"Conformer-based":[3,66],"end-to-end":[4],"neural":[5],"diarization":[6,121,151],"(EEND)":[7],"model":[8,57,110],"that":[9,93,134],"uses":[10],"both":[11],"acoustic":[12,81],"input":[13],"and":[14,38,41],"features":[15,27,30,42,77,96,118],"derived":[16,31,43],"from":[17,33,44],"an":[18],"automatic":[19],"speech":[20],"recognition":[21],"(ASR)":[22],"model.":[23],"Two":[24],"categories":[25],"of":[26,129,145],"are":[28,69,78],"explored:":[29],"directly":[32],"ASR":[34,60,76,95,117,147],"output":[35],"(phones,":[36],"position-in-word":[37,138],"word":[39],"boundaries)":[40],"lexical":[46],"speaker":[47,100],"change":[48],"detection":[49],"model,":[50],"trained":[51],"by":[52,155],"finetuning":[53],"pretrained":[55],"BERT":[56],"on":[58,124],"the":[59,65,73,109,116,125,141,150,159],"output.":[61],"Three":[62],"modifications":[63],"to":[64,71,97,107,111,158],"EEND":[67],"architecture":[68],"proposed":[70],"incorporate":[72],"features.":[74,82],"First,":[75],"concatenated":[79],"with":[80,120,137],"Second,":[83],"we":[84],"propose":[85],"new":[87],"attention":[88],"mechanism":[89],"called":[90],"contextualized":[91],"self-attention":[92],"utilizes":[94],"build":[98],"robust":[99],"representations.":[101],"Finally,":[102],"multi-task":[103,135],"learning":[104,136],"is":[105,140],"used":[106],"train":[108],"minimize":[112],"classification":[113],"loss":[114],"for":[115],"along":[119],"loss.":[122],"Experiments":[123],"two-speaker":[126],"English":[127],"conversations":[128],"Switchboard+SRE":[130],"data":[131],"sets":[132],"show":[133],"information":[139],"most":[142],"effective":[143],"way":[144],"utilizing":[146],"features,":[148],"reducing":[149],"error":[152],"rate":[153],"(DER)":[154],"20%":[156],"relative":[157],"baseline.":[160]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
