{"id":"https://openalex.org/W4221142717","doi":"https://doi.org/10.1109/icassp43922.2022.9747198","title":"Building Robust Spoken Language Understanding by Cross Attention Between Phoneme Sequence and ASR Hypothesis","display_name":"Building Robust Spoken Language Understanding by Cross Attention Between Phoneme Sequence and ASR Hypothesis","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4221142717","doi":"https://doi.org/10.1109/icassp43922.2022.9747198"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747198","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747198","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068267703","display_name":"Zexun Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zexun Wang","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034405338","display_name":"Yuquan Le","orcid":"https://orcid.org/0000-0001-6283-9037"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]},{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuquan Le","raw_affiliation_strings":["JD AI,Beijing,China","Hunan University","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"Hunan University","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100434324","display_name":"Yi Zhu","orcid":"https://orcid.org/0000-0003-3000-3918"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]},{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Yi Zhu","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China","LTL, University of Cambridge"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"LTL, University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100599483","display_name":"Yuming Zhao","orcid":"https://orcid.org/0009-0004-8445-8365"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuming Zhao","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076923788","display_name":"Mingchao Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingchao Feng","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115621122","display_name":"Meng Chen","orcid":"https://orcid.org/0009-0006-9908-4524"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Chen","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101727205","display_name":"Xiaodong He","orcid":"https://orcid.org/0000-0002-9463-9168"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodong He","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2076,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.37598728,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"7147","last_page":"7151"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8545339703559875},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.6753944158554077},{"id":"https://openalex.org/keywords/complementarity","display_name":"Complementarity (molecular biology)","score":0.6152438521385193},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5747038722038269},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5726746916770935},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5580376982688904},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48581671714782715},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.4847692847251892},{"id":"https://openalex.org/keywords/confusion","display_name":"Confusion","score":0.4124469757080078},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.34313708543777466}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8545339703559875},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.6753944158554077},{"id":"https://openalex.org/C202269582","wikidata":"https://www.wikidata.org/wiki/Q2644277","display_name":"Complementarity (molecular biology)","level":2,"score":0.6152438521385193},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5747038722038269},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5726746916770935},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5580376982688904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48581671714782715},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.4847692847251892},{"id":"https://openalex.org/C2781140086","wikidata":"https://www.wikidata.org/wiki/Q557945","display_name":"Confusion","level":2,"score":0.4124469757080078},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.34313708543777466},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C11171543","wikidata":"https://www.wikidata.org/wiki/Q41630","display_name":"Psychoanalysis","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747198","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747198","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.699999988079071}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2024632416","https://openalex.org/W2064675550","https://openalex.org/W2137871902","https://openalex.org/W2145077480","https://openalex.org/W2189256702","https://openalex.org/W2511962886","https://openalex.org/W2594395650","https://openalex.org/W2803392141","https://openalex.org/W2890817076","https://openalex.org/W2933022734","https://openalex.org/W2963288440","https://openalex.org/W2972545188","https://openalex.org/W2972584841","https://openalex.org/W2980347982","https://openalex.org/W2999245659","https://openalex.org/W3015747801","https://openalex.org/W3034931686","https://openalex.org/W3035676545","https://openalex.org/W3096926041","https://openalex.org/W3136178271","https://openalex.org/W3161302809","https://openalex.org/W3197744084","https://openalex.org/W6631190155","https://openalex.org/W6680709282","https://openalex.org/W6767471572","https://openalex.org/W6772693987","https://openalex.org/W6791205002"],"related_works":["https://openalex.org/W3174008653","https://openalex.org/W2765804957","https://openalex.org/W2962716343","https://openalex.org/W4288099861","https://openalex.org/W2893411096","https://openalex.org/W43702919","https://openalex.org/W4213400064","https://openalex.org/W4288263119","https://openalex.org/W4390690247","https://openalex.org/W2139439365"],"abstract_inverted_index":{"Building":[0],"Spoken":[1],"Language":[2],"Understanding":[3],"(SLU)":[4],"robust":[5,141],"to":[6,71,83],"Automatic":[7],"Speech":[8],"Recognition":[9],"(ASR)":[10],"errors":[11,25,102],"is":[12,69],"an":[13],"essential":[14],"issue":[15],"for":[16,60,98],"various":[17],"voice-enabled":[18],"virtual":[19],"assistants.":[20],"Considering":[21],"that":[22],"most":[23],"ASR":[24,43,101],"are":[26,112],"caused":[27],"by":[28],"phonetic":[29,90],"confusion":[30],"between":[31,76],"similar-sounding":[32],"expressions,":[33],"intuitively,":[34],"leveraging":[35],"the":[36,47,73,85,89,100,118,129],"phoneme":[37,77],"sequence":[38],"of":[39,49,94,122,131],"speech":[40],"can":[41],"complement":[42],"hypothesis":[44],"and":[45,78,91,97,120,133],"enhance":[46],"robustness":[48],"SLU.":[50],"This":[51],"paper":[52],"proposes":[53],"a":[54],"novel":[55],"model":[56],"with":[57,139],"Cross":[58],"Attention":[59],"SLU":[61,142],"(denoted":[62],"as":[63],"CASLU).":[64],"The":[65],"cross":[66],"attention":[67],"block":[68],"devised":[70],"catch":[72,88],"fine-grained":[74],"interactions":[75],"word":[79],"embeddings":[80],"in":[81,103],"order":[82],"make":[84],"joint":[86],"representations":[87],"semantic":[92],"features":[93],"input":[95],"simultaneously":[96],"overcoming":[99],"downstream":[104],"natural":[105],"language":[106],"understanding":[107],"(NLU)":[108],"tasks.":[109],"Extensive":[110],"experiments":[111],"conducted":[113],"on":[114],"three":[115],"datasets,":[116],"showing":[117],"effectiveness":[119],"competitiveness":[121],"our":[123],"approach.":[124],"Additionally,":[125],"We":[126],"also":[127],"validate":[128],"universality":[130],"CASLU":[132],"prove":[134],"its":[135],"complementarity":[136],"when":[137],"combining":[138],"other":[140],"techniques.":[143]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
