{"id":"https://openalex.org/W3015797571","doi":"https://doi.org/10.1109/icassp40776.2020.9054417","title":"Improved End-To-End Spoken Utterance Classification with a Self-Attention Acoustic Classifier","display_name":"Improved End-To-End Spoken Utterance Classification with a Self-Attention Acoustic Classifier","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015797571","doi":"https://doi.org/10.1109/icassp40776.2020.9054417","mag":"3015797571"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9054417","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054417","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028120017","display_name":"Ryan Price","orcid":"https://orcid.org/0000-0003-0624-6629"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ryan Price","raw_affiliation_strings":["Interactions Research"],"affiliations":[{"raw_affiliation_string":"Interactions Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060747272","display_name":"Mahnoosh Mehrabani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahnoosh Mehrabani","raw_affiliation_strings":["Interactions Research"],"affiliations":[{"raw_affiliation_string":"Interactions Research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111475783","display_name":"Srinivas Bangalore","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Srinivas Bangalore","raw_affiliation_strings":["Interactions Research"],"affiliations":[{"raw_affiliation_string":"Interactions Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5028120017"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5907,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.86576993,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"15","issue":null,"first_page":"8504","last_page":"8508"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7993043661117554},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.7973200082778931},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.7264606356620789},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6663411855697632},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.5461065769195557},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.5028886198997498},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4798033535480499},{"id":"https://openalex.org/keywords/conjunction","display_name":"Conjunction (astronomy)","score":0.43626269698143005},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3871632218360901}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7993043661117554},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.7973200082778931},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.7264606356620789},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6663411855697632},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.5461065769195557},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.5028886198997498},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4798033535480499},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.43626269698143005},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3871632218360901},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9054417","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9054417","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8299999833106995}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W648947103","https://openalex.org/W854541894","https://openalex.org/W1600744878","https://openalex.org/W1614298861","https://openalex.org/W1649407914","https://openalex.org/W1832693441","https://openalex.org/W2084339293","https://openalex.org/W2095705004","https://openalex.org/W2127141656","https://openalex.org/W2149980590","https://openalex.org/W2170240176","https://openalex.org/W2170919426","https://openalex.org/W2193413348","https://openalex.org/W2407080277","https://openalex.org/W2597655663","https://openalex.org/W2745969942","https://openalex.org/W2786839803","https://openalex.org/W2891229414","https://openalex.org/W2891367150","https://openalex.org/W2894164357","https://openalex.org/W2902864383","https://openalex.org/W2950577311","https://openalex.org/W2962949994","https://openalex.org/W2963012544","https://openalex.org/W2963288440","https://openalex.org/W2964108264","https://openalex.org/W2972584841","https://openalex.org/W6600284362","https://openalex.org/W6623517193","https://openalex.org/W6636510571","https://openalex.org/W6674330103","https://openalex.org/W6685053522","https://openalex.org/W6687566353","https://openalex.org/W6735377749","https://openalex.org/W6756373434"],"related_works":["https://openalex.org/W1573992054","https://openalex.org/W1599690842","https://openalex.org/W2753053412","https://openalex.org/W2665157442","https://openalex.org/W3108840034","https://openalex.org/W4388169484","https://openalex.org/W2363259562","https://openalex.org/W3036937347","https://openalex.org/W3149224203","https://openalex.org/W2284708545"],"abstract_inverted_index":{"While":[0],"human":[1,118],"language":[2],"provides":[3],"a":[4,19,24,33,48,95],"natural":[5],"interface":[6],"for":[7,51,76],"humanmachine":[8],"communication,":[9],"there":[10],"are":[11,110],"several":[12],"challenges":[13],"concerning":[14],"extracting":[15],"the":[16,29,60,73,87],"intents":[17],"of":[18,62],"speaker":[20,30],"when":[21,28,102],"interacting":[22],"with":[23,68,89,117],"virtual":[25],"agent,":[26],"especially":[27],"is":[31,114],"in":[32,66],"noisy":[34],"acoustic":[35,69,90,103],"environment,":[36],"that":[37,80,113],"still":[38],"remains":[39],"to":[40,94],"be":[41,84,121],"solved.":[42],"In":[43],"this":[44],"paper,":[45],"we":[46],"propose":[47],"new":[49],"architecture":[50],"end-to-end":[52,74],"spoken":[53],"utterance":[54],"classification":[55],"(SUC)":[56],"and":[57,104],"also":[58],"explore":[59],"impact":[61],"leveraging":[63],"lexical":[64,105],"information":[65,70],"conjunction":[67],"obtained":[71,85],"from":[72,107],"model":[75,88],"SUC.":[77],"We":[78],"demonstrate":[79],"strong":[81],"performance":[82],"can":[83,120],"by":[86],"features":[91],"alone":[92],"compared":[93],"text":[96],"classifier":[97],"on":[98,115],"ASR":[99],"outputs.":[100],"Furthermore,":[101],"embeddings":[106],"these":[108],"classifiers":[109],"combined,":[111],"accuracy":[112],"par":[116],"agents":[119],"achieved.":[122]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
