{"id":"https://openalex.org/W4226507200","doi":"https://doi.org/10.1109/icassp43922.2022.9746086","title":"Improving End-to-end Models for Set Prediction in Spoken Language Understanding","display_name":"Improving End-to-end Models for Set Prediction in Spoken Language Understanding","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4226507200","doi":"https://doi.org/10.1109/icassp43922.2022.9746086"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746086","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746086","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110252428","display_name":"Hong-Kwang Jeff Kuo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hong-Kwang J. Kuo","raw_affiliation_strings":["IBM Research AI"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047353872","display_name":"Zolt\u00e1n T\u00fcske","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zoltan Tuske","raw_affiliation_strings":["IBM Research AI"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101787514","display_name":"Samuel Thomas","orcid":"https://orcid.org/0000-0001-7573-0620"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Samuel Thomas","raw_affiliation_strings":["IBM Research AI"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003725957","display_name":"Brian Kingsbury","orcid":"https://orcid.org/0000-0002-1343-6837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brian Kingsbury","raw_affiliation_strings":["IBM Research AI"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079994647","display_name":"George Saon","orcid":"https://orcid.org/0009-0004-6837-5009"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"George Saon","raw_affiliation_strings":["IBM Research AI"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5110252428"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02881922,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7162","last_page":"7166"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8665642738342285},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.8266226053237915},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6413758397102356},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6364484429359436},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5835041999816895},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5722807049751282},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5619560480117798},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.5569731593132019},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5512644648551941},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5366958975791931},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5073298811912537}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8665642738342285},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.8266226053237915},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6413758397102356},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6364484429359436},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5835041999816895},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5722807049751282},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5619560480117798},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.5569731593132019},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5512644648551941},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5366958975791931},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5073298811912537},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746086","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746086","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7099999785423279,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1828163288","https://openalex.org/W2077302143","https://openalex.org/W2137871902","https://openalex.org/W2166293310","https://openalex.org/W2894164357","https://openalex.org/W2899771611","https://openalex.org/W2908510526","https://openalex.org/W2914417638","https://openalex.org/W2962760690","https://openalex.org/W2962784628","https://openalex.org/W2963050422","https://openalex.org/W2963414781","https://openalex.org/W2972327934","https://openalex.org/W2972584841","https://openalex.org/W2973040747","https://openalex.org/W2973127116","https://openalex.org/W2973229104","https://openalex.org/W3002595344","https://openalex.org/W3006901707","https://openalex.org/W3007227084","https://openalex.org/W3015686596","https://openalex.org/W3016006013","https://openalex.org/W3016262400","https://openalex.org/W3095552229","https://openalex.org/W3096249532","https://openalex.org/W3097747488","https://openalex.org/W3097777922","https://openalex.org/W3097964672","https://openalex.org/W3161000786","https://openalex.org/W3162875390","https://openalex.org/W3163300396","https://openalex.org/W3174321708","https://openalex.org/W3198654230","https://openalex.org/W4287596060","https://openalex.org/W4293714597","https://openalex.org/W6623517193","https://openalex.org/W6638749077","https://openalex.org/W6714142977","https://openalex.org/W6756040250","https://openalex.org/W6757817989","https://openalex.org/W6785224828"],"related_works":["https://openalex.org/W3179968364","https://openalex.org/W1999612375","https://openalex.org/W2938107654","https://openalex.org/W4390516098","https://openalex.org/W2151749779","https://openalex.org/W3008587939","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W2916997151","https://openalex.org/W2949174760"],"abstract_inverted_index":{"The":[0],"goal":[1],"of":[2,13,67],"spoken":[3,91,100,122],"language":[4],"understanding":[5],"(SLU)":[6],"systems":[7],"is":[8,62,88,102],"to":[9,23,37,47,119],"determine":[10],"the":[11,14,84,121],"meaning":[12],"input":[15],"speech":[16,19,31],"signal,":[17],"unlike":[18],"recognition":[20],"which":[21,43],"aims":[22],"produce":[24],"verbatim":[25,50],"transcripts.":[26,51],"Advances":[27],"in":[28,90],"end-to-end":[29],"(E2E)":[30],"modeling":[32],"have":[33],"made":[34],"it":[35],"possible":[36],"train":[38],"solely":[39],"on":[40,54],"semantic":[41],"entities,":[42],"are":[44],"far":[45],"cheaper":[46],"collect":[48],"than":[49,130],"We":[52],"focus":[53],"this":[55],"set":[56],"prediction":[57],"problem,":[58],"where":[59],"entity":[60,86,99],"order":[61,101],"unspecified.":[63],"Using":[64],"two":[65],"classes":[66],"E2E":[68,95],"models,":[69,142],"RNN":[70],"transducers":[71],"and":[72,134],"attention":[73,115,138],"based":[74,116,139],"encoder-decoders,":[75],"we":[76,104],"show":[77],"that":[78],"these":[79],"models":[80,97],"work":[81],"best":[82],"when":[83,98],"training":[85],"sequence":[87],"arranged":[89],"order.":[92,123],"To":[93],"improve":[94],"SLU":[96,141],"unknown,":[103],"propose":[105],"a":[106],"novel":[107],"data":[108],"augmentation":[109],"technique":[110],"along":[111],"with":[112],"an":[113],"implicit":[114],"alignment":[117],"method":[118],"infer":[120],"F1":[124],"scores":[125],"significantly":[126],"increased":[127],"by":[128],"more":[129],"11%":[131],"for":[132,137],"RNN-T":[133],"about":[135],"2%":[136],"encoder-decoder":[140],"outperforming":[143],"previously":[144],"reported":[145],"results.":[146]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
