{"id":"https://openalex.org/W3161631007","doi":"https://doi.org/10.1109/icassp39728.2021.9414566","title":"DO as I Mean, Not as I Say: Sequence Loss Training for Spoken Language Understanding","display_name":"DO as I Mean, Not as I Say: Sequence Loss Training for Spoken Language Understanding","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3161631007","doi":"https://doi.org/10.1109/icassp39728.2021.9414566","mag":"3161631007"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9414566","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414566","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031236216","display_name":"Milind Rao","orcid":"https://orcid.org/0000-0003-2649-3205"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Milind Rao","raw_affiliation_strings":["Amazon Alexa, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076504502","display_name":"Pranav Dheram","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pranav Dheram","raw_affiliation_strings":["Amazon Alexa, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003334789","display_name":"Gautam Tiwari","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gautam Tiwari","raw_affiliation_strings":["Amazon Alexa, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109046112","display_name":"Anirudh Raju","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anirudh Raju","raw_affiliation_strings":["Amazon Alexa, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012153296","display_name":"Jasha Droppo","orcid":"https://orcid.org/0000-0001-6097-0090"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jasha Droppo","raw_affiliation_strings":["Amazon Alexa, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110230355","display_name":"Ariya Rastrow","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ariya Rastrow","raw_affiliation_strings":["Amazon Alexa, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060979948","display_name":"Andreas Stolcke","orcid":"https://orcid.org/0000-0002-9925-905X"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andreas Stolcke","raw_affiliation_strings":["Amazon Alexa, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa, USA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5031236216"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":1.9036,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.88127199,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8709409236907959},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.6299169063568115},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6270838975906372},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5585858821868896},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.5535668134689331},{"id":"https://openalex.org/keywords/proxy","display_name":"Proxy (statistics)","score":0.541091799736023},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5168038606643677},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4825403094291687},{"id":"https://openalex.org/keywords/sequence-labeling","display_name":"Sequence labeling","score":0.4605087637901306},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4471849203109741},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4085180163383484},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4038258194923401},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.22199007868766785},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08224210143089294}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8709409236907959},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.6299169063568115},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6270838975906372},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5585858821868896},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.5535668134689331},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.541091799736023},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5168038606643677},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4825403094291687},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.4605087637901306},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4471849203109741},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4085180163383484},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4038258194923401},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.22199007868766785},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08224210143089294},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9414566","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414566","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7200000286102295,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W55328212","https://openalex.org/W1828163288","https://openalex.org/W1989996186","https://openalex.org/W2007261869","https://openalex.org/W2016589492","https://openalex.org/W2119717200","https://openalex.org/W2127141656","https://openalex.org/W2158289097","https://openalex.org/W2296283641","https://openalex.org/W2327501763","https://openalex.org/W2547875792","https://openalex.org/W2786839803","https://openalex.org/W2787045460","https://openalex.org/W2894164357","https://openalex.org/W2914417638","https://openalex.org/W2917128112","https://openalex.org/W2962715022","https://openalex.org/W2962824709","https://openalex.org/W2963288440","https://openalex.org/W2963747784","https://openalex.org/W2968831808","https://openalex.org/W2972584841","https://openalex.org/W2972818416","https://openalex.org/W3005910077","https://openalex.org/W3006901707","https://openalex.org/W3007328579","https://openalex.org/W3017465475","https://openalex.org/W3049038774","https://openalex.org/W3094979069","https://openalex.org/W3095552229","https://openalex.org/W3113320605","https://openalex.org/W3174321708","https://openalex.org/W6602256816","https://openalex.org/W6638749077","https://openalex.org/W6729448088","https://openalex.org/W6748302340","https://openalex.org/W6759393636","https://openalex.org/W6776354695","https://openalex.org/W6786908755"],"related_works":["https://openalex.org/W3174008653","https://openalex.org/W3138431698","https://openalex.org/W2962716343","https://openalex.org/W2765804957","https://openalex.org/W49594164","https://openalex.org/W2367925007","https://openalex.org/W3016262400","https://openalex.org/W4288099861","https://openalex.org/W43702919","https://openalex.org/W2139439365"],"abstract_inverted_index":{"Spoken":[0],"language":[1,46],"understanding":[2,47],"(SLU)":[3],"systems":[4],"extract":[5,31],"transcriptions,":[6],"as":[7,9,83],"well":[8],"semantics":[10,32],"of":[11,22,38,63],"intent":[12],"or":[13,35,67],"named":[14],"entities":[15],"from":[16,33],"speech,":[17],"and":[18,44,89,97,117,126,150],"are":[19,36,50,65],"essential":[20],"components":[21],"voice":[23],"activated":[24],"systems.":[25],"SLU":[26,81,98,115,151],"models,":[27,49],"which":[28],"either":[29],"directly":[30],"audio":[34],"composed":[37],"pipelined":[39],"automatic":[40],"speech":[41],"recognition":[42],"(ASR)":[43],"natural":[45],"(NLU)":[48],"typically":[51],"trained":[52],"via":[53],"differentiable":[54],"cross-entropy":[55],"losses,":[56],"even":[57],"when":[58],"the":[59,91,111,138],"relevant":[60],"performance":[61,128],"metrics":[62,82,129],"interest":[64],"word":[66],"semantic":[68,87,139,156],"error":[69,88],"rates.":[70],"In":[71],"this":[72,101],"work,":[73],"we":[74],"propose":[75],"non-differentiable":[76],"sequence":[77,107,140],"losses":[78],"based":[79],"on":[80,113,130],"a":[84],"proxy":[85],"for":[86],"use":[90],"REINFORCE":[92],"trick":[93],"to":[94,119,147],"train":[95],"ASR":[96,125,149],"models":[99,152],"with":[100],"loss.":[102],"We":[103,134],"show":[104],"that":[105],"custom":[106],"loss":[108,141],"training":[109,142],"is":[110],"state-of-the-art":[112],"open":[114],"datasets":[116],"leads":[118],"6%":[120],"relative":[121],"improvement":[122],"in":[123],"both":[124],"NLU":[127],"large":[131],"proprietary":[132],"datasets.":[133],"also":[135],"demonstrate":[136],"how":[137],"paradigm":[143],"can":[144],"be":[145],"used":[146],"update":[148],"without":[153],"transcripts,":[154],"using":[155],"feedback":[157],"alone.":[158]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
