{"id":"https://openalex.org/W4372260554","doi":"https://doi.org/10.1109/icassp49357.2023.10095434","title":"Improving Contextual Spelling Correction by External Acoustics Attention and Semantic Aware Data Augmentation","display_name":"Improving Contextual Spelling Correction by External Acoustics Attention and Semantic Aware Data Augmentation","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372260554","doi":"https://doi.org/10.1109/icassp49357.2023.10095434"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095434","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100626793","display_name":"Xiaoqiang Wang","orcid":"https://orcid.org/0000-0001-6096-8011"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiaoqiang Wang","raw_affiliation_strings":["Microsoft Corporation,Redmond,WA,US","Microsoft Corporation, Redmond, WA, US"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation,Redmond,WA,US","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, US","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100360935","display_name":"Yanqing Liu","orcid":"https://orcid.org/0000-0003-0412-8805"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanqing Liu","raw_affiliation_strings":["Microsoft Corporation,Redmond,WA,US","Microsoft Corporation, Redmond, WA, US"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation,Redmond,WA,US","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, US","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft Corporation,Redmond,WA,US","Microsoft Corporation, Redmond, WA, US"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation,Redmond,WA,US","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, US","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100329353","display_name":"Sheng Zhao","orcid":"https://orcid.org/0000-0002-9624-5381"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sheng Zhao","raw_affiliation_strings":["Microsoft Corporation,Redmond,WA,US","Microsoft Corporation, Redmond, WA, US"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation,Redmond,WA,US","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, US","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100626793"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":0.5882,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.69684815,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.787521243095398},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6661695837974548},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.6589603424072266},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6077343225479126},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5130547285079956},{"id":"https://openalex.org/keywords/spelling","display_name":"Spelling","score":0.5104771852493286},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5044466257095337},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4797819256782532},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.42345383763313293},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.4109649658203125},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2927013039588928},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10240727663040161}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.787521243095398},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6661695837974548},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.6589603424072266},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6077343225479126},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5130547285079956},{"id":"https://openalex.org/C2777801307","wikidata":"https://www.wikidata.org/wiki/Q2088390","display_name":"Spelling","level":2,"score":0.5104771852493286},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5044466257095337},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4797819256782532},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.42345383763313293},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.4109649658203125},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2927013039588928},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10240727663040161},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095434","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6299999952316284}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1821462560","https://openalex.org/W2133564696","https://openalex.org/W2402040300","https://openalex.org/W2886319145","https://openalex.org/W2889012072","https://openalex.org/W2916997151","https://openalex.org/W2937402758","https://openalex.org/W2962760690","https://openalex.org/W2972625221","https://openalex.org/W3011339933","https://openalex.org/W3094667432","https://openalex.org/W3096815019","https://openalex.org/W3097777922","https://openalex.org/W3097794466","https://openalex.org/W3140235797","https://openalex.org/W3141464856","https://openalex.org/W3161873870","https://openalex.org/W3193959417","https://openalex.org/W3198004110","https://openalex.org/W3205495812","https://openalex.org/W3211278025","https://openalex.org/W4221165942","https://openalex.org/W4224917454","https://openalex.org/W4226292626","https://openalex.org/W4226302523","https://openalex.org/W4226462878","https://openalex.org/W4395957972","https://openalex.org/W6638523607","https://openalex.org/W6679434410","https://openalex.org/W6761263335","https://openalex.org/W6777337586"],"related_works":["https://openalex.org/W2100947578","https://openalex.org/W2161008081","https://openalex.org/W1555832326","https://openalex.org/W4298186509","https://openalex.org/W2556702969","https://openalex.org/W217221262","https://openalex.org/W611030372","https://openalex.org/W1974418053","https://openalex.org/W2081317458","https://openalex.org/W2021532426"],"abstract_inverted_index":{"We":[0],"previously":[1],"proposed":[2],"contextual":[3,20,140],"spelling":[4,136],"correction":[5,137],"(CSC)":[6],"to":[7,48,69,148,173,195,203,234],"correct":[8,70],"the":[9,34,64,86,94,150,197,206,212,216,235],"output":[10],"of":[11,58,98],"end-to-end":[12],"(E2E)":[13],"automatic":[14],"speech":[15],"recognition":[16],"(ASR)":[17],"models":[18],"with":[19,72,162],"information":[21,49,161],"such":[22],"as":[23,166,168,221,223],"name,":[24],"place,":[25],"etc.":[26],"Although":[27],"CSC":[28,65,152,172,237],"has":[29],"achieved":[30],"reasonable":[31],"improvement":[32,232],"in":[33,51,85,103,109,127,142,192],"biasing":[35,80,141,207],"problem,":[36],"there":[37,89,111],"are":[38,82],"still":[39],"two":[40,155],"drawbacks":[41],"for":[42,139],"further":[43,204],"accuracy":[44],"improvement.":[45],"First,":[46],"due":[47],"limitation":[50],"text":[52,169],"only":[53],"hypothesis":[54],"or":[55,75,180],"weak":[56],"performance":[57],"ASR":[59,146],"model":[60,66,138,153],"on":[61],"rare":[62],"domains,":[63],"may":[67,112],"fail":[68],"phrases":[71,81],"similar":[73],"pronunciation":[74],"anti-context":[76],"cases":[77],"where":[78],"all":[79],"not":[83],"present":[84],"utterance.":[87],"Second,":[88],"is":[90,105],"a":[91,186],"discrepancy":[92],"between":[93,116,199],"training":[95,104,193,200],"and":[96,120,201,229],"inference":[97,110,202],"CSC.":[99],"The":[100],"bias":[101,241],"list":[102,242],"randomly":[106],"selected":[107],"but":[108],"be":[113],"more":[114],"similarity":[115],"ground":[117],"truth":[118],"phrase":[119,177,194],"other":[121],"phrases.":[122,182],"To":[123],"solve":[124],"above":[125],"limitations,":[126],"this":[128],"paper":[129],"we":[130,158,184],"propose":[131],"an":[132,163],"improved":[133,213],"non-autoregressive":[134],"(NAR)":[135],"E2E":[143],"neural":[144],"transducer-based":[145],"systems":[147],"improve":[149],"previous":[151,236],"from":[154,178],"perspectives:":[156],"Firstly,":[157],"incorporate":[159],"acoustics":[160],"external":[164],"attention":[165],"well":[167],"hypotheses":[170],"into":[171],"better":[174],"distinguish":[175],"target":[176],"dissimilar":[179],"irrelevant":[181],"Secondly,":[183],"design":[185],"semantic":[187],"aware":[188],"data":[189],"augmentation":[190],"schema":[191],"reduce":[196],"mismatch":[198],"boost":[205],"accuracy.":[208],"Experiments":[209],"show":[210],"that":[211],"method":[214,238],"outperforms":[215],"baseline":[217],"ASR+Biasing":[218],"system":[219],"by":[220],"much":[222],"20.3%":[224],"relative":[225],"name":[226,243],"recall":[227],"gain":[228],"achieves":[230],"stable":[231],"compared":[233],"over":[239],"different":[240],"coverage":[244],"ratio.":[245]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
