{"id":"https://openalex.org/W2972356804","doi":"https://doi.org/10.21437/interspeech.2019-2985","title":"Sequence-to-Sequence Learning via Attention Transfer for Incremental Speech Recognition","display_name":"Sequence-to-Sequence Learning via Attention Transfer for Incremental Speech Recognition","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2972356804","doi":"https://doi.org/10.21437/interspeech.2019-2985","mag":"2972356804"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-2985","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048192694","display_name":"Sashi Novitasari","orcid":"https://orcid.org/0000-0001-7467-5682"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sashi Novitasari","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038296765","display_name":"Andros Tjandra","orcid":"https://orcid.org/0000-0003-1246-5908"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andros Tjandra","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040108974","display_name":"Sakriani Sakti","orcid":"https://orcid.org/0000-0001-5509-8963"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sakriani Sakti","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020994673","display_name":"Satoshi Nakamura","orcid":"https://orcid.org/0000-0001-6956-3803"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Satoshi Nakamura","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5048192694"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5402,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.87498299,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3835","last_page":"3839"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9739000201225281,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.828762412071228},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.7633811235427856},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7448952198028564},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6567369103431702},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.6560107469558716},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6188440918922424},{"id":"https://openalex.org/keywords/sequence-learning","display_name":"Sequence learning","score":0.5099605321884155},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4755682349205017},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4544832706451416},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43190357089042664},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4185170531272888},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3770820200443268}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.828762412071228},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.7633811235427856},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7448952198028564},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6567369103431702},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.6560107469558716},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6188440918922424},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.5099605321884155},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4755682349205017},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4544832706451416},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43190357089042664},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4185170531272888},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3770820200443268},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2019-2985","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7900000214576721,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2529301793","https://openalex.org/W2384121599","https://openalex.org/W2038083449","https://openalex.org/W3177678247","https://openalex.org/W1999617572","https://openalex.org/W2944572343","https://openalex.org/W2333799855","https://openalex.org/W2351687372","https://openalex.org/W2004087835","https://openalex.org/W2741675028"],"abstract_inverted_index":{"Attention-based":[0],"sequence-to-sequence":[1],"automatic":[2],"speech":[3,33,73,161],"recognition":[4,34,176],"(ASR)":[5],"requires":[6],"a":[7,97,121,124],"significant":[8],"delay":[9],"to":[10,58,84,151,187,191],"recognize":[11],"long":[12],"utterances":[13],"because":[14,54],"the":[15,46,55,60,65,70,86,101,105,108,128,132,147,153,157,164,172,194],"output":[16],"is":[17,42,53,82],"generated":[18],"after":[19],"receiving":[20],"entire":[21],"input":[22,159],"sequences.":[23],"Although":[24],"several":[25],"studies":[26],"recently":[27],"proposed":[28],"sequence":[29],"mechanisms":[30],"for":[31,92],"incremental":[32,61],"(ISR),":[35],"using":[36,120],"different":[37],"frameworks":[38],"and":[39,63,104,141,163],"learning":[40],"algorithms":[41],"more":[43],"complicated":[44],"than":[45],"standard":[47],"ASR":[48,91,99],"model.":[49,110],"One":[50],"main":[51],"reason":[52],"model":[56,103,134],"needs":[57,190],"decide":[59],"steps":[62],"learn":[64],"transcription":[66],"that":[67,169,189],"aligns":[68],"with":[69,136,178],"current":[71,158],"short":[72,160],"segment.":[74],"In":[75],"this":[76],"work,":[77],"we":[78,182],"investigate":[79],"whether":[80],"it":[81],"possible":[83],"employ":[85],"original":[87,129],"architecture":[88,130],"of":[89,119,131,175],"attention-based":[90],"ISR":[93,106],"tasks":[94],"by":[95,170],"treating":[96],"full-utterance":[98],"as":[100,107],"teacher":[102,133],"student":[109,115,148],"We":[111],"design":[112],"an":[113],"alternative":[114],"network":[116,149],"that,":[117],"instead":[118],"thinner":[122],"or":[123],"shallower":[125],"model,":[126],"keeps":[127],"but":[135],"shorter":[137],"sequences":[138],"(few":[139],"encoder":[140],"decoder":[142],"states).":[143],"Using":[144],"attention":[145],"transfer,":[146],"learns":[150],"mimic":[152],"same":[154],"alignment":[155],"between":[156],"segments":[162],"transcription.":[165],"Our":[166],"experiments":[167],"show":[168],"delaying":[171],"starting":[173],"time":[174],"process":[177],"about":[179],"1.7":[180],"sec,":[181],"can":[183],"achieve":[184],"comparable":[185],"performance":[186],"one":[188],"wait":[192],"until":[193],"end.":[195]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
