{"id":"https://openalex.org/W2798949062","doi":"https://doi.org/10.18653/v1/p18-1244","title":"A Purely End-to-End System for Multi-speaker Speech Recognition","display_name":"A Purely End-to-End System for Multi-speaker Speech Recognition","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2798949062","doi":"https://doi.org/10.18653/v1/p18-1244","mag":"2798949062"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p18-1244","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p18-1244","pdf_url":"https://www.aclweb.org/anthology/P18-1244.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/P18-1244.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110334536","display_name":"Hiroshi Seki","orcid":"https://orcid.org/0000-0002-1769-2627"},"institutions":[{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]},{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]},{"id":"https://openalex.org/I136259955","display_name":"Toyohashi University of Technology","ror":"https://ror.org/04ezg6d83","country_code":"JP","type":"education","lineage":["https://openalex.org/I136259955"]}],"countries":["JP","US"],"is_corresponding":true,"raw_author_name":"Hiroshi Seki","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL)","Toyohashi University of Technology","Mitsubishi Electric Research USA"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL)","institution_ids":["https://openalex.org/I4210133125"]},{"raw_affiliation_string":"Toyohashi University of Technology","institution_ids":["https://openalex.org/I136259955"]},{"raw_affiliation_string":"Mitsubishi Electric Research USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087554069","display_name":"Takaaki Hori","orcid":"https://orcid.org/0000-0003-4560-8039"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]},{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]}],"countries":["JP","US"],"is_corresponding":false,"raw_author_name":"Takaaki Hori","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL)","Mitsubishi Electric Research USA"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL)","institution_ids":["https://openalex.org/I4210133125"]},{"raw_affiliation_string":"Mitsubishi Electric Research USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076453358","display_name":"Jonathan Le Roux","orcid":"https://orcid.org/0000-0002-3451-171X"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]},{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]}],"countries":["JP","US"],"is_corresponding":false,"raw_author_name":"Jonathan Le Roux","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL)","Mitsubishi Electric Research USA"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL)","institution_ids":["https://openalex.org/I4210133125"]},{"raw_affiliation_string":"Mitsubishi Electric Research USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112763337","display_name":"John R. Hershey","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]},{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]}],"countries":["JP","US"],"is_corresponding":false,"raw_author_name":"John R. Hershey","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL)","Mitsubishi Electric Research USA"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL)","institution_ids":["https://openalex.org/I4210133125"]},{"raw_affiliation_string":"Mitsubishi Electric Research USA","institution_ids":["https://openalex.org/I4210159266"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5110334536"],"corresponding_institution_ids":["https://openalex.org/I136259955","https://openalex.org/I4210133125","https://openalex.org/I4210159266"],"apc_list":null,"apc_paid":null,"fwci":0.8437,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.79866868,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2620","last_page":"2630"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.8360376358032227},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7377141714096069},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7259238362312317},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6286616921424866},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5792235136032104},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.532776951789856},{"id":"https://openalex.org/keywords/sequence-labeling","display_name":"Sequence labeling","score":0.4893340468406677},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.47184064984321594},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37378138303756714},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34835219383239746},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09220030903816223}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.8360376358032227},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7377141714096069},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7259238362312317},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6286616921424866},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5792235136032104},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.532776951789856},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.4893340468406677},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.47184064984321594},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37378138303756714},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34835219383239746},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09220030903816223},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/p18-1244","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p18-1244","pdf_url":"https://www.aclweb.org/anthology/P18-1244.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1805.05826","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1805.05826","pdf_url":"https://arxiv.org/pdf/1805.05826","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.1805.05826","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1805.05826","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:2798949062","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.18653/v1/p18-1244","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p18-1244","pdf_url":"https://www.aclweb.org/anthology/P18-1244.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4300000071525574,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2798949062.pdf","grobid_xml":"https://content.openalex.org/works/W2798949062.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2963477857","https://openalex.org/W3133801870","https://openalex.org/W3145709712","https://openalex.org/W3037383957","https://openalex.org/W2949788262","https://openalex.org/W142758689","https://openalex.org/W3046954891","https://openalex.org/W2900312178","https://openalex.org/W2737108017","https://openalex.org/W2799930831","https://openalex.org/W3008181812","https://openalex.org/W3014973941","https://openalex.org/W2554781714","https://openalex.org/W3096665264","https://openalex.org/W3026709049","https://openalex.org/W2991480510","https://openalex.org/W3132220150","https://openalex.org/W1494243555","https://openalex.org/W2949526695","https://openalex.org/W2933148111"],"abstract_inverted_index":{"Recently,":[0],"there":[1],"has":[2],"been":[3,24],"growing":[4],"interest":[5],"in":[6,76],"multi-speaker":[7],"speech":[8,66,73,115],"recognition,":[9],"where":[10],"the":[11,89,92,104,131,135],"utterances":[12],"of":[13],"multiple":[14,60,118],"speakers":[15],"are":[16,137],"recognized":[17],"from":[18,63,113],"their":[19],"mixture.":[20],"Promising":[21],"techniques":[22],"have":[23,32],"proposed":[25,132],"for":[26,45],"this":[27,49],"task,":[28],"but":[29],"earlier":[30],"works":[31,145],"required":[33],"additional":[34],"training":[35],"data":[36],"such":[37],"as":[38],"isolated":[39],"source":[40,70],"signals":[41],"or":[42],"senone":[43],"alignments":[44],"effective":[46],"learning.":[47],"In":[48],"paper,":[50],"we":[51],"propose":[52,82],"a":[53,64,83,111,114,127],"new":[54,84],"sequence-to-sequence":[55],"framework":[56],"to":[57,87,95,109,117,126,139],"directly":[58,107],"decode":[59],"label":[61,119],"sequences":[62],"single":[65],"sequence":[67],"by":[68,142],"unifying":[69],"separation":[71,148],"and":[72,149],"recognition":[74,150],"functions":[75],"an":[77],"end-toend":[78],"manner.":[79],"We":[80],"further":[81],"objective":[85],"function":[86],"improve":[88],"contrast":[90],"between":[91],"hidden":[93],"vectors":[94],"avoid":[96],"generating":[97],"similar":[98],"hypotheses.":[99],"Experimental":[100],"results":[101,136],"show":[102],"that":[103],"model":[105,128],"is":[106],"able":[108],"learn":[110],"mapping":[112],"mixture":[116],"sequences,":[120],"achieving":[121],"83.1%":[122],"relative":[123],"improvement":[124],"compared":[125],"trained":[129],"without":[130],"objective.":[133],"Interestingly,":[134],"comparable":[138],"those":[140],"produced":[141],"previous":[143],"endto-end":[144],"featuring":[146],"explicit":[147],"modules.":[151]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
