{"id":"https://openalex.org/W2921879147","doi":"https://doi.org/10.23919/apsipa.2018.8659622","title":"Neural Speech-to-Text Language Models for Rescoring Hypotheses of DNN-HMM Hybrid Automatic Speech Recognition Systems","display_name":"Neural Speech-to-Text Language Models for Rescoring Hypotheses of DNN-HMM Hybrid Automatic Speech Recognition Systems","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2921879147","doi":"https://doi.org/10.23919/apsipa.2018.8659622","mag":"2921879147"},"language":"en","primary_location":{"id":"doi:10.23919/apsipa.2018.8659622","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659622","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009756622","display_name":"Tomohiro Tanaka","orcid":"https://orcid.org/0000-0001-7442-4912"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Tomohiro Tanaka","raw_affiliation_strings":["NTT Media Intelligence Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Media Intelligence Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060644399","display_name":"Ryo Masumura","orcid":"https://orcid.org/0000-0002-2415-4149"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryo Masumura","raw_affiliation_strings":["NTT Media Intelligence Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Media Intelligence Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087290011","display_name":"Takafumi Moriya","orcid":"https://orcid.org/0000-0003-1942-7250"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takafumi Moriya","raw_affiliation_strings":["NTT Media Intelligence Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Media Intelligence Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067237053","display_name":"Yushi Aono","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yushi Aono","raw_affiliation_strings":["NTT Media Intelligence Laboratories, NTT Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"NTT Media Intelligence Laboratories, NTT Corporation, Japan","institution_ids":["https://openalex.org/I2251713219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5009756622"],"corresponding_institution_ids":["https://openalex.org/I2251713219"],"apc_list":null,"apc_paid":null,"fwci":1.7917,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.89014227,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"196","last_page":"200"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8024778366088867},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7535562515258789},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7363985776901245},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5001068115234375},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4723607003688812},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4552319645881653},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.362553209066391}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8024778366088867},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7535562515258789},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7363985776901245},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5001068115234375},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4723607003688812},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4552319645881653},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.362553209066391}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/apsipa.2018.8659622","is_oa":false,"landing_page_url":"https://doi.org/10.23919/apsipa.2018.8659622","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W854541894","https://openalex.org/W1922655562","https://openalex.org/W1970689298","https://openalex.org/W1999965501","https://openalex.org/W2005708641","https://openalex.org/W2102113734","https://openalex.org/W2107878631","https://openalex.org/W2112739286","https://openalex.org/W2127141656","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2138889249","https://openalex.org/W2143612262","https://openalex.org/W2155273149","https://openalex.org/W2160815625","https://openalex.org/W2289121263","https://openalex.org/W2293185259","https://openalex.org/W2402268235","https://openalex.org/W2405047074","https://openalex.org/W2408727897","https://openalex.org/W2750562505","https://openalex.org/W2758172851","https://openalex.org/W2785994290","https://openalex.org/W2915926444","https://openalex.org/W2962826786","https://openalex.org/W2963211739","https://openalex.org/W2964308564","https://openalex.org/W2998704965","https://openalex.org/W4285719527","https://openalex.org/W6607333740","https://openalex.org/W6623517193","https://openalex.org/W6675365184","https://openalex.org/W6679436768","https://openalex.org/W6680532216","https://openalex.org/W6697157346","https://openalex.org/W6713098461","https://openalex.org/W6713736167","https://openalex.org/W6714019678"],"related_works":["https://openalex.org/W2364370872","https://openalex.org/W2053269318","https://openalex.org/W2136763963","https://openalex.org/W2109705048","https://openalex.org/W2940588515","https://openalex.org/W1909151225","https://openalex.org/W2160030256","https://openalex.org/W1521297879","https://openalex.org/W4253235840","https://openalex.org/W3151937861"],"abstract_inverted_index":{"In":[0,207],"this":[1],"paper,":[2],"we":[3,210],"propose":[4],"to":[5,45,66,136,149,165],"leverage":[6,166],"end-to-end":[7,68,97,139,168,200,229,257],"automatic":[8],"speech":[9,127],"recognition":[10],"(ASR)":[11],"systems":[12,70,99,118,141,170,181,197,202,216,231],"for":[13,152],"assisting":[14],"deep":[15],"neural":[16,76,143,224],"network-hidden":[17],"Markov":[18],"model":[19,38],"(DNN-HMM)":[20],"hybrid":[21,26,159,179,195,214,252],"ASR":[22,27,52,69,98,140,160,169,180,196,201,215,230,246,253,258],"systems.":[23,161],"The":[24],"DNN-HMM":[25,158,178,194,213,251],"system,":[28],"which":[29],"is":[30,43,79,94,115,135,187],"composed":[31,74],"of":[32,75,107],"an":[33],"acoustic":[34,226],"model,":[35,42],"a":[36,40,244],"language":[37,145],"and":[39,105,128,148,219,228,255],"pronunciation":[41],"known":[44,80],"be":[46],"the":[47,55,67,96,122,125,129,138,157,167,173,177,190,193,199,250,256],"most":[48],"practical":[49],"architecture":[50],"in":[51,63,110,156],"field.":[53],"On":[54],"other":[56],"hand,":[57],"much":[58],"attention":[59],"has":[60],"been":[61],"paied":[62],"recent":[64],"studies":[65],"that":[71,81,95,189,238],"are":[72],"fully":[73],"networks.":[77],"It":[78,186],"they":[82],"can":[83,182,242],"yield":[84,243],"comparative":[85],"performance":[86,247],"without":[87],"introducing":[88],"heuristic":[89],"operations.":[90],"However,":[91],"one":[92],"problem":[93],"sometimes":[100],"suffer":[101],"from":[102],"redundant":[103],"generation":[104,112,174],"ommission":[106],"important":[108],"words":[109],"text":[111],"phases.":[113],"This":[114,162],"because":[116,176,198],"these":[117],"cannot":[119],"explicitly":[120],"consider":[121],"connection":[123],"between":[124],"input":[126],"output":[130],"text.":[131],"Therefore,":[132],"our":[133,208,239],"idea":[134],"regard":[137],"as":[142],"speech-to-text":[144],"models":[146,227],"(NS2TLMs)":[147],"use":[150,211],"them":[151],"rescoring":[153],"hypotheses":[154],"generated":[155],"enables":[163],"us":[164],"while":[171],"avoiding":[172],"issues":[175],"generate":[183],"speech-aligned":[184],"hypotheses.":[185],"expected":[188],"NS2TLMs":[191],"improve":[192],"correctly":[203],"handle":[204],"short-duration":[205],"utterances.":[206],"experiments,":[209],"state-of-the-art":[212],"with":[217],"convolutional":[218],"long":[220],"short-term":[221],"memory":[222],"recurrent":[223],"network":[225],"based":[232],"on":[233],"attetional":[234],"encoder-decoder.":[235],"We":[236],"demonstrate":[237],"proposed":[240],"method":[241],"better":[245],"than":[248],"both":[249],"system":[254],"system.":[259]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
