{"id":"https://openalex.org/W3159980879","doi":"https://doi.org/10.1109/icassp39728.2021.9414510","title":"Efficient Use of End-to-End Data in Spoken Language Processing","display_name":"Efficient Use of End-to-End Data in Spoken Language Processing","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3159980879","doi":"https://doi.org/10.1109/icassp39728.2021.9414510","mag":"3159980879"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9414510","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414510","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009680990","display_name":"Yiting Lu","orcid":"https://orcid.org/0000-0002-7615-4167"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Yiting Lu","raw_affiliation_strings":["University of Cambridge,ALTA Institute,Engineering Department,UK"],"affiliations":[{"raw_affiliation_string":"University of Cambridge,ALTA Institute,Engineering Department,UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100445125","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0001-9500-081X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Wang","raw_affiliation_strings":["Shanghai Jiao Tong University,Shanghai Key Lab. of Digital Media Processing &#x0026; Transmissions,China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Shanghai Key Lab. of Digital Media Processing &#x0026; Transmissions,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050766679","display_name":"Mark Gales","orcid":"https://orcid.org/0000-0002-5311-8219"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mark J.F. Gales","raw_affiliation_strings":["University of Cambridge,ALTA Institute,Engineering Department,UK"],"affiliations":[{"raw_affiliation_string":"University of Cambridge,ALTA Institute,Engineering Department,UK","institution_ids":["https://openalex.org/I241749"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5009680990"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05553866,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7518","last_page":"7522"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8421235084533691},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6834279298782349},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.6320400238037109},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6092090606689453},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5421285629272461},{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.5343486666679382},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5131431221961975},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5008127689361572},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4836805760860443},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4463627338409424},{"id":"https://openalex.org/keywords/terminal","display_name":"Terminal (telecommunication)","score":0.44601279497146606},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4191523790359497},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4171915054321289},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.41293075680732727},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3964233994483948},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.35488972067832947}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8421235084533691},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6834279298782349},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.6320400238037109},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6092090606689453},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5421285629272461},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.5343486666679382},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5131431221961975},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5008127689361572},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4836805760860443},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4463627338409424},{"id":"https://openalex.org/C2779664074","wikidata":"https://www.wikidata.org/wiki/Q3518405","display_name":"Terminal (telecommunication)","level":2,"score":0.44601279497146606},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4191523790359497},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4171915054321289},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41293075680732727},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3964233994483948},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.35488972067832947},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9414510","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414510","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W53604701","https://openalex.org/W1522301498","https://openalex.org/W1556750318","https://openalex.org/W2059637352","https://openalex.org/W2115340555","https://openalex.org/W2124807415","https://openalex.org/W2252239724","https://openalex.org/W2327501763","https://openalex.org/W2525778437","https://openalex.org/W2582956876","https://openalex.org/W2760656271","https://openalex.org/W2799473636","https://openalex.org/W2888867175","https://openalex.org/W2903193068","https://openalex.org/W2936774411","https://openalex.org/W2936969148","https://openalex.org/W2945700568","https://openalex.org/W2949328740","https://openalex.org/W2951635603","https://openalex.org/W2962784628","https://openalex.org/W2963403868","https://openalex.org/W2963779652","https://openalex.org/W2963834942","https://openalex.org/W2964121744","https://openalex.org/W2964161387","https://openalex.org/W2997436923","https://openalex.org/W3006988520","https://openalex.org/W3016137827","https://openalex.org/W3034332156","https://openalex.org/W3034625919","https://openalex.org/W3037542581","https://openalex.org/W3038092046","https://openalex.org/W3101648800","https://openalex.org/W4300882364","https://openalex.org/W4385245566","https://openalex.org/W6602138409","https://openalex.org/W6631190155","https://openalex.org/W6638575559","https://openalex.org/W6691444175","https://openalex.org/W6739901393","https://openalex.org/W6748598161","https://openalex.org/W6776030248","https://openalex.org/W6845923349"],"related_works":["https://openalex.org/W2990025607","https://openalex.org/W3045103338","https://openalex.org/W3007142233","https://openalex.org/W2404510748","https://openalex.org/W2916997151","https://openalex.org/W3089218859","https://openalex.org/W2949174760","https://openalex.org/W4399356803","https://openalex.org/W3177132412","https://openalex.org/W3198731777"],"abstract_inverted_index":{"For":[0],"many":[1],"challenging":[2],"tasks":[3,26],"there":[4],"is":[5,98,119,133,143,166,182],"often":[6],"limited":[7,83,196],"data":[8,39,97,132,142,155],"to":[9,67,91,99,156,168],"train":[10],"the":[11,69,76,93,96,103,122,140,154,161,173],"systems":[12],"in":[13],"an":[14],"end-to-end":[15,84,131],"fashion,":[16],"which":[17],"has":[18],"become":[19],"increasingly":[20],"popular":[21],"for":[22,86,195],"deep-learning.":[23],"However,":[24],"these":[25],"can":[27,79],"normally":[28],"be":[29,80,157],"split":[30],"into":[31,49],"multiple":[32,63],"separate":[33],"modules,":[34],"with":[35,41,56,163],"significant":[36,127],"quantities":[37,128],"of":[38,82,95,129,153,160,178],"associated":[40],"each":[42],"module.":[43],"Spoken":[44],"language":[45,124],"processing":[46],"applications":[47],"fit":[48],"this":[50,115],"scenario,":[51],"as":[52,121],"they":[53],"usually":[54],"start":[55],"a":[57],"speech":[58,117],"recognition":[59],"module,":[60],"followed":[61],"by":[62],"task":[64],"specific":[65],"modules":[66,104,149,181],"achieve":[68],"end":[70],"goal.":[71],"This":[72],"work":[73,116],"examines":[74],"how":[75,167],"best":[77],"use":[78,94,152],"made":[81],"training":[85],"sequence-to-sequence":[87],"tasks.":[88],"The":[89],"key":[90],"improving":[92],"more":[100],"tightly":[101],"integrate":[102],"via":[105],"embeddings,":[106],"rather":[107],"than":[108],"simply":[109],"propagating":[110],"words":[111],"between":[112,148,172,180],"modules.":[113,174],"In":[114],"translation":[118],"considered":[120],"spoken":[123],"application.":[125],"When":[126,139],"in-domain,":[130],"available,":[134],"cascade":[135,190],"approaches":[136,194],"operate":[137],"well.":[138],"in-domain":[141,197],"limited,":[144],"how-ever,":[145],"tighter":[146,164],"integration":[147,165],"enables":[150],"better":[151],"made.":[158],"One":[159],"challenges":[162],"ensure":[169],"embedding":[170],"consistency":[171],"A":[175],"novel":[176],"form":[177],"embedding-passing":[179,193],"proposed":[183],"that":[184],"shows":[185],"improved":[186],"performance":[187],"over":[188],"both":[189],"and":[191],"standard":[192],"data.":[198]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
