{"id":"https://openalex.org/W3138431698","doi":"https://doi.org/10.1109/icassp39728.2021.9414313","title":"Top-Down Attention in End-to-End Spoken Language Understanding","display_name":"Top-Down Attention in End-to-End Spoken Language Understanding","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3138431698","doi":"https://doi.org/10.1109/icassp39728.2021.9414313","mag":"3138431698"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9414313","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414313","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100393448","display_name":"Yixin Chen","orcid":"https://orcid.org/0000-0002-8176-0241"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yixin Chen","raw_affiliation_strings":["University of California,Department of Statistics,Los Angeles (UCLA)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California,Department of Statistics,Los Angeles (UCLA)","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056785027","display_name":"Weiyi Lu","orcid":"https://orcid.org/0000-0003-1069-3480"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weiyi Lu","raw_affiliation_strings":["Amazon Alexa,Seattle,USA","Amazon Alexa, Seattle, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,Seattle,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa, Seattle, USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012793114","display_name":"Alejandro Mottini","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alejandro Mottini","raw_affiliation_strings":["Amazon Alexa,Seattle,USA","Amazon Alexa, Seattle, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,Seattle,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa, Seattle, USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053710007","display_name":"Li Erran Li","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Li Erran Li","raw_affiliation_strings":["Amazon Alexa,Seattle,USA","Amazon Alexa, Seattle, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,Seattle,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa, Seattle, USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012153296","display_name":"Jasha Droppo","orcid":"https://orcid.org/0000-0001-6097-0090"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jasha Droppo","raw_affiliation_strings":["Amazon Alexa,Seattle,USA","Amazon Alexa, Seattle, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,Seattle,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa, Seattle, USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066223118","display_name":"Zheng Du","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zheng Du","raw_affiliation_strings":["Amazon Alexa,Seattle,USA","Amazon Alexa, Seattle, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,Seattle,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa, Seattle, USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082750097","display_name":"Belinda Zeng","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Belinda Zeng","raw_affiliation_strings":["Amazon Alexa,Seattle,USA","Amazon Alexa, Seattle, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Amazon Alexa,Seattle,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa, Seattle, USA","institution_ids":["https://openalex.org/I1311688040","https://openalex.org/I58610484"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7762,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.72889147,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"6199","last_page":"6203"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.8371127843856812},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6977746486663818},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.6493280529975891},{"id":"https://openalex.org/keywords/end-user-development","display_name":"End-user development","score":0.4847695231437683},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3392980694770813},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3289461135864258},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3160271942615509},{"id":"https://openalex.org/keywords/end-user","display_name":"End user","score":0.3147624135017395},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2639439105987549},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.14410921931266785}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.8371127843856812},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6977746486663818},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.6493280529975891},{"id":"https://openalex.org/C2776867947","wikidata":"https://www.wikidata.org/wiki/Q500467","display_name":"End-user development","level":3,"score":0.4847695231437683},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3392980694770813},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3289461135864258},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3160271942615509},{"id":"https://openalex.org/C91262260","wikidata":"https://www.wikidata.org/wiki/Q528074","display_name":"End user","level":2,"score":0.3147624135017395},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2639439105987549},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.14410921931266785},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9414313","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414313","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.800000011920929,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W648947103","https://openalex.org/W1494198834","https://openalex.org/W1649407914","https://openalex.org/W2042346717","https://openalex.org/W2160747344","https://openalex.org/W2575842049","https://openalex.org/W2745461083","https://openalex.org/W2894164357","https://openalex.org/W2896457183","https://openalex.org/W2963266252","https://openalex.org/W2963288440","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2972584841","https://openalex.org/W2972818416","https://openalex.org/W3007328579","https://openalex.org/W3034266838","https://openalex.org/W3035515490","https://openalex.org/W3049038774","https://openalex.org/W3095189764","https://openalex.org/W3095552229","https://openalex.org/W3097964672","https://openalex.org/W4287748764","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6780220573"],"related_works":["https://openalex.org/W4299590256","https://openalex.org/W2919182614","https://openalex.org/W3163634122","https://openalex.org/W2113114423","https://openalex.org/W4287122814","https://openalex.org/W2167239121","https://openalex.org/W4391709660","https://openalex.org/W2529145526","https://openalex.org/W2884814595","https://openalex.org/W4327633437"],"abstract_inverted_index":{"Spoken":[0],"language":[1],"understanding":[2],"(SLU)":[3],"is":[4,80,100,192],"the":[5,9,65,70,85,89,180,210,219],"task":[6],"of":[7,11,23,64,69,77,88,171,201],"inferring":[8],"semantics":[10],"spoken":[12],"utterances.":[13],"Traditionally,":[14],"this":[15,78,135],"has":[16],"been":[17],"achieved":[18],"with":[19,161],"a":[20,42,61,142,168,185],"cascading":[21],"combination":[22],"Automatic":[24],"Speech":[25],"Recognition":[26],"(ASR)":[27],"and":[28,91,111,126,152,184,203,207,213],"Natural":[29],"Language":[30],"Understanding":[31],"(NLU)":[32],"modules":[33],"that":[34,102,148,209],"are":[35],"optimized":[36],"separately,":[37],"which":[38,165],"can":[39,120,217],"lead":[40],"to":[41,54,82,94,129,156,167,194],"suboptimal":[43],"overall":[44],"performance.":[45,97,221],"More":[46],"recently,":[47],"End-to-End":[48],"SLU":[49,56,72,140,146],"(E2E":[50],"SLU)":[51],"was":[52],"proposed":[53],"perform":[55],"directly":[57],"from":[58],"speech":[59],"through":[60],"joint":[62],"optimization":[63,170],"modules,":[66],"addressing":[67],"some":[68],"traditional":[71],"shortcomings.":[73],"A":[74],"key":[75],"challenge":[76],"approach":[79],"how":[81],"best":[83],"integrate":[84],"feature":[86],"learning":[87],"ASR":[90,105,118,163,202],"NLU":[92,112,159,204],"sub-tasks":[93],"maximize":[95],"their":[96,131],"While":[98],"it":[99],"known":[101],"in":[103,199],"general,":[104],"models":[106,113,119],"focus":[107],"on":[108,134],"low-level":[109,162],"features,":[110,164],"need":[114],"higher-level":[115],"contextual":[116],"information,":[117],"nonetheless":[121],"also":[122],"leverage":[123],"top-down":[124,150],"syntactic":[125,212],"semantic":[127,214],"information":[128,216],"improve":[130,218],"recognition.":[132],"Based":[133],"insight,":[136],"we":[137],"propose":[138],"Top-Down":[139],"(TD-SLU),":[141],"new":[143],"transformer-based":[144],"E2E":[145],"model":[147,178],"uses":[149],"attention":[151,154],"an":[153],"gate":[155],"fuse":[157],"high-level":[158,215],"features":[160],"leads":[166],"better":[169],"both":[172,198],"tasks.":[173],"We":[174],"have":[175],"validated":[176],"our":[177],"using":[179],"public":[181],"FluentSpeech":[182],"set,":[183],"large":[186],"custom":[187],"dataset.":[188],"Results":[189],"show":[190],"TD-SLU":[191],"able":[193],"outperform":[195],"selected":[196],"baselines":[197],"terms":[200],"quality":[205],"metrics,":[206],"suggest":[208],"added":[211],"model\u2019s":[220]},"counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
