{"id":"https://openalex.org/W2751179137","doi":"https://doi.org/10.1147/jrd.2017.2701207","title":"Recent progress in deep end-to-end models for spoken language processing","display_name":"Recent progress in deep end-to-end models for spoken language processing","publication_year":2017,"publication_date":"2017-07-01","ids":{"openalex":"https://openalex.org/W2751179137","doi":"https://doi.org/10.1147/jrd.2017.2701207","mag":"2751179137"},"language":"en","primary_location":{"id":"doi:10.1147/jrd.2017.2701207","is_oa":false,"landing_page_url":"https://doi.org/10.1147/jrd.2017.2701207","pdf_url":null,"source":{"id":"https://openalex.org/S4210219925","display_name":"IBM Journal of Research and Development","issn_l":"0018-8646","issn":["0018-8646","2151-8556"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320652","host_organization_name":"IBM","host_organization_lineage":["https://openalex.org/P4310320652"],"host_organization_lineage_names":["IBM"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IBM Journal of Research and Development","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015927589","display_name":"Kartik Audhkhasi","orcid":"https://orcid.org/0000-0002-2340-1144"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"K. Audhkhasi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024913801","display_name":"Andrew E. Rosenberg","orcid":"https://orcid.org/0000-0002-4536-8824"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"A. Rosenberg","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079994647","display_name":"George Saon","orcid":"https://orcid.org/0009-0004-6837-5009"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"G. Saon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088302697","display_name":"Abhinav Sethy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"A. Sethy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071715737","display_name":"Bhuvana Ramabhadran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"B. Ramabhadran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055627037","display_name":"Shengyong Chen","orcid":"https://orcid.org/0000-0002-6705-3831"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"S. Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5034529775","display_name":"Michael Picheny","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M. Picheny","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5015927589"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7801,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.79467588,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"61","issue":"4/5","first_page":"2:1","last_page":"2:10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7872207164764404},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.706962525844574},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.6611853241920471},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.582593560218811},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5786499977111816},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.5777138471603394},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5700685977935791},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5242823958396912},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.510732889175415},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.49994659423828125},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.4843684136867523},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4365490972995758},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.413654088973999},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3906469941139221}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7872207164764404},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.706962525844574},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.6611853241920471},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.582593560218811},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5786499977111816},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.5777138471603394},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5700685977935791},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5242823958396912},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.510732889175415},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.49994659423828125},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.4843684136867523},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4365490972995758},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.413654088973999},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3906469941139221},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1147/jrd.2017.2701207","is_oa":false,"landing_page_url":"https://doi.org/10.1147/jrd.2017.2701207","pdf_url":null,"source":{"id":"https://openalex.org/S4210219925","display_name":"IBM Journal of Research and Development","issn_l":"0018-8646","issn":["0018-8646","2151-8556"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320652","host_organization_name":"IBM","host_organization_lineage":["https://openalex.org/P4310320652"],"host_organization_lineage_names":["IBM"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IBM Journal of Research and Development","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7400000095367432,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2531681599","display_name":null,"funder_award_id":"W911NF-12-C-0012","funder_id":"https://openalex.org/F4320338295","funder_display_name":"Army Research Laboratory"}],"funders":[{"id":"https://openalex.org/F4320306078","display_name":"U.S. Department of Defense","ror":"https://ror.org/0447fe631"},{"id":"https://openalex.org/F4320333051","display_name":"Intelligence Advanced Research Projects Activity","ror":"https://ror.org/01v3fsc55"},{"id":"https://openalex.org/F4320338295","display_name":"Army Research Laboratory","ror":"https://ror.org/011hc8f90"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1922655562","https://openalex.org/W2064675550","https://openalex.org/W2102113734","https://openalex.org/W2127141656","https://openalex.org/W2133564696","https://openalex.org/W2155117693","https://openalex.org/W2157331557","https://openalex.org/W2160815625","https://openalex.org/W2293858598","https://openalex.org/W2327501763","https://openalex.org/W2403195671","https://openalex.org/W2962826786","https://openalex.org/W2963211739","https://openalex.org/W2963920996","https://openalex.org/W2964199361","https://openalex.org/W2964308564","https://openalex.org/W6629052376","https://openalex.org/W6640090968","https://openalex.org/W6675365184","https://openalex.org/W6679434410","https://openalex.org/W6713280142"],"related_works":["https://openalex.org/W4205841273","https://openalex.org/W4205525690","https://openalex.org/W1761388607","https://openalex.org/W1997922073","https://openalex.org/W2604685715","https://openalex.org/W2412160900","https://openalex.org/W2136453575","https://openalex.org/W2916997151","https://openalex.org/W2096389830","https://openalex.org/W1499383998"],"abstract_inverted_index":{"End-to-end":[0],"models":[1,85,99,163,179],"(or":[2],"sequence-to-sequence":[3],"models)":[4],"based":[5,113],"on":[6,83,94,114,127],"deep":[7,40],"neural":[8,41,50,110],"networks":[9,111],"have":[10],"recently":[11],"become":[12],"popular":[13],"within":[14,77],"the":[15,33,54,61,78,142,166],"machine":[16],"learning":[17],"community.":[18],"These":[19],"techniques":[20],"are":[21],"also":[22,152,172],"increasingly":[23],"used":[24],"in":[25,176],"automatic":[26],"speech":[27],"recognition":[28],"as":[29,131],"an":[30],"alternative":[31],"to":[32,101],"state-of-the-art,":[34],"hybrid":[35,169],"HMM-DNN":[36,168],"(hidden":[37],"Markov":[38],"model,":[39],"network)":[42],"system.":[43],"The":[44],"end-to-end":[45,84,98],"systems":[46],"contain":[47],"a":[48,154],"purely":[49],"architecture":[51],"that":[52],"eliminates":[53],"need":[55],"of":[56,97,157,161],"any":[57],"time":[58],"alignment":[59],"between":[60],"input":[62],"acoustic":[63],"feature":[64],"vector":[65],"sequence":[66],"and":[67,103,119,139,171],"output":[68],"phone":[69],"sequence.":[70],"In":[71],"this":[72],"paper,":[73],"we":[74],"present":[75,91,125,153],"progress":[76],"IBM":[79],"Watson":[80],"Multimodal":[81],"Group":[82],"for":[86,180],"spoken":[87,181],"language":[88,182],"processing.":[89,183],"We":[90,124,151],"our":[92],"work":[93],"two":[95],"types":[96],"applied":[100],"speech-to-text":[102],"keyword":[104],"search":[105],"tasks,":[106],"namely,":[107],"1)":[108],"recurrent":[109],"(RNNs)":[112],"connectionist":[115],"temporal":[116],"classification":[117],"loss,":[118],"2)":[120],"attention-based":[121],"encoder\u2013decoder":[122],"RNNs.":[123],"results":[126],"several":[128],"languages":[129],"(such":[130],"Pashto,":[132],"Mongolian,":[133],"Javanese,":[134],"Amharic,":[135],"Guarani,":[136],"Dholuo,":[137],"Igbo,":[138],"Georgian)":[140],"from":[141],"Intelligence":[143],"Advanced":[144],"Research":[145],"Projects":[146],"Activity":[147],"funded":[148],"Babel":[149],"Program.":[150],"detailed":[155],"analysis":[156],"some":[158],"salient":[159],"characteristics":[160],"these":[162],"compared":[164],"with":[165],"state-of-the-art":[167],"systems,":[170],"discuss":[173],"future":[174],"challenges":[175],"using":[177],"such":[178]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
